import cv2
import dlib
import time
import numpy as np
from scipy.spatial import distance as dist
import pygame
import sys
import math
# ================= CONFIG =================
FACE_DOWNSAMPLE_RATIO = 1.5
RESIZE_HEIGHT = 460
blinkTime = 0.15
drowsyTime = 1.5
yawnTime = 1.0
modelPath = "models/shape_predictor_68_face_landmarks.dat"
sound_path = "alarm.wav"
# ==========================================
# ================= AUDIO ===================
pygame.mixer.init()
pygame.mixer.music.load(sound_path)
# ==========================================
# ================= DLIB ====================
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(modelPath)
# ==========================================
# ================= LANDMARK INDEX ==========
leftEyeIndex = [36, 37, 38, 39, 40, 41]
rightEyeIndex = [42, 43, 44, 45, 46, 47]
mouthIndex = [48,49,50,51,52,53,54,55,56,57,58,59]
# ==========================================
# ================= VARIABLES ===============
blinkCount = 0
yawnCount = 0
eyeState = 0
mouthState = 0
drowsy = False
alarm_on = False
adaptiveEAR = 0
adaptiveMAR = 0
# ==========================================
# ================= FUNCTIONS ===============
def get_head_pose(landmarks, frame_size):
image_points = np.array([
landmarks[30], # Nose tip
landmarks[8], # Chin
landmarks[36], # Left eye left corner
landmarks[45], # Right eye right corner
landmarks[48], # Left mouth corner
landmarks[54] # Right mouth corner
], dtype="double")
model_points = np.array([
(0.0, 0.0, 0.0), # Nose tip
(0.0, -330.0, -65.0), # Chin
(-225.0, 170.0, -135.0), # Left eye
(225.0, 170.0, -135.0), # Right eye
(-150.0, -150.0, -125.0), # Left mouth
(150.0, -150.0, -125.0) # Right mouth
])
size = frame_size
focal_length = size[1]
center = (size[1] / 2, size[0] / 2)
camera_matrix = np.array([
[focal_length, 0, center[0]],
[0, focal_length, center[1]],
[0, 0, 1]
], dtype="double")
dist_coeffs = np.zeros((4,1))
success, rotation_vector, translation_vector = cv2.solvePnP(
model_points,
image_points,
camera_matrix,
dist_coeffs,
flags=cv2.SOLVEPNP_ITERATIVE
)
rmat, _ = cv2.Rodrigues(rotation_vector)
angles, _, _, _, _, _ = cv2.RQDecomp3x3(rmat)
pitch = angles[0]
yaw = angles[1]
roll = angles[2]
return pitch, yaw, roll
def eye_aspect_ratio(eye):
A = dist.euclidean(eye[1], eye[5])
B = dist.euclidean(eye[2], eye[4])
C = dist.euclidean(eye[0], eye[3])
return (A + B) / (2.0 * C)
def mouth_aspect_ratio(mouth):
A = dist.euclidean(mouth[2], mouth[10])
B = dist.euclidean(mouth[4], mouth[8])
C = dist.euclidean(mouth[0], mouth[6])
return (A + B) / (2.0 * C)
def get_landmarks(frame):
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
small = cv2.resize(
gray,
None,
fx=1.0/FACE_DOWNSAMPLE_RATIO,
fy=1.0/FACE_DOWNSAMPLE_RATIO
)
rects = detector(small, 0)
if len(rects) == 0:
return None
rect = rects[0]
rect = dlib.rectangle(
int(rect.left()*FACE_DOWNSAMPLE_RATIO),
int(rect.top()*FACE_DOWNSAMPLE_RATIO),
int(rect.right()*FACE_DOWNSAMPLE_RATIO),
int(rect.bottom()*FACE_DOWNSAMPLE_RATIO)
)
shape = predictor(gray, rect)
return [(p.x, p.y) for p in shape.parts()]
# ==========================================
# ================= CAMERA ==================
cap = cv2.VideoCapture(0)
if not cap.isOpened():
print("Camera error")
sys.exit()
# ==========================================
# ================= CALIBRATION (ADAPTIVE) ==
print("Calibration... Keep eyes open & mouth closed")
ear_samples = []
mar_samples = []
calib_frames = 60
while len(ear_samples) < calib_frames:
ret, frame = cap.read()
if not ret:
continue
h = frame.shape[0]
scale = h / RESIZE_HEIGHT
frame = cv2.resize(frame, None, fx=1/scale, fy=1/scale)
landmarks = get_landmarks(frame)
if landmarks is None:
continue
leftEye = [landmarks[i] for i in leftEyeIndex]
rightEye = [landmarks[i] for i in rightEyeIndex]
mouth = [landmarks[i] for i in mouthIndex]
ear = (eye_aspect_ratio(leftEye) + eye_aspect_ratio(rightEye)) / 2.0
mar = mouth_aspect_ratio(mouth)
ear_samples.append(ear)
mar_samples.append(mar)
print("Calibration Done")
adaptiveEAR = np.mean(ear_samples) * 0.75
adaptiveMAR = np.mean(mar_samples) + 0.15
# ==========================================
# ================= MAIN LOOP ===============
while True:
ret, frame = cap.read()
if not ret:
break
h = frame.shape[0]
scale = h / RESIZE_HEIGHT
frame = cv2.resize(frame, None, fx=1/scale, fy=1/scale)
landmarks = get_landmarks(frame)
if landmarks is None:
cv2.imshow("Driver Monitoring", frame)
if cv2.waitKey(1) & 0xFF == 27:
break
continue
# ===== EAR =====
leftEye = [landmarks[i] for i in leftEyeIndex]
rightEye = [landmarks[i] for i in rightEyeIndex]
ear = (eye_aspect_ratio(leftEye) + eye_aspect_ratio(rightEye)) / 2.0
# ===== MAR =====
mouth = [landmarks[i] for i in mouthIndex]
mar = mouth_aspect_ratio(mouth)
# ===== BLINK & DROWSY =====
if ear < adaptiveEAR:
eyeState += 1
else:
if eyeState > 2:
blinkCount += 1
eyeState = 0
if eyeState > 15:
drowsy = True
else:
drowsy = False
# ===== YAWN =====
if mar > adaptiveMAR:
mouthState += 1
else:
if mouthState > 10:
yawnCount += 1
mouthState = 0
pitch, yaw, roll = get_head_pose(landmarks, frame.shape)
pitch = -pitch # Membalik agar positif = menengadah, negatif = menunduk
# ===== DRAW LANDMARKS =====
for i in leftEyeIndex + rightEyeIndex + mouthIndex:
cv2.circle(frame, landmarks[i], 1, (0, 0, 255), -1)
# ===== DISPLAY =====
cv2.putText(frame, f"EAR: {round(ear,2)}",
(20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,255,0), 2)
cv2.putText(frame, f"MAR: {round(mar,2)}",
(20, 70), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,255,0), 2)
cv2.putText(frame, f"Blinks: {blinkCount}",
(350, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,255,0), 2)
cv2.putText(frame, f"Yawns: {yawnCount}",
(350, 70), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,255,255), 2)
cv2.putText(frame, f"Yaw: {round(yaw, 1)}",
(20, 110), cv2.FONT_HERSHEY_SIMPLEX,
0.6, (255, 255, 0), 2)
cv2.putText(frame, f"Pitch: {round(pitch, 1)}",
(20, 140), cv2.FONT_HERSHEY_SIMPLEX,
0.6, (255, 255, 0), 2)
# ===== DISTRACTION DETECTION =====
if abs(yaw) > 20:
cv2.putText(frame, "LOOKING SIDEWAYS!",
(60, 170), cv2.FONT_HERSHEY_COMPLEX,
0.7, (0, 165, 255), 2)
# ===== HEAD DOWN ALERT =====
if pitch > -20: # sekarang negatif = menunduk
cv2.putText(frame, "HEAD DOWN!",
(60, 200), cv2.FONT_HERSHEY_COMPLEX,
0.7, (0, 0, 255), 2)
# ===== ALERT =====
if drowsy or mouthState > 10:
cv2.putText(frame, "!!! ALERT !!!",
(60, 120), cv2.FONT_HERSHEY_COMPLEX,
0.8, (0,0,255), 2)
if not alarm_on:
pygame.mixer.music.play(-1)
alarm_on = True
else:
if alarm_on:
pygame.mixer.music.stop()
alarm_on = False
cv2.imshow("Driver Monitoring", frame)
key = cv2.waitKey(1)
if key == ord('r'):
blinkCount = 0
yawnCount = 0
elif key == 27:
break
pygame.mixer.music.stop()
pygame.quit()
cap.release()
cv2.destroyAllWindows()