Files
Face_Recognition/features_extraction_to_csv.py

483 lines
18 KiB
Python
Executable File

# Copyright (C) 2018-2021 coneypo
# SPDX-License-Identifier: MIT
import os
import dlib
import csv
import numpy as np
import logging
import cv2
from PIL import Image
# 要读取人脸图像文件的路径
path_images_from_camera = "data/data_faces"
# Dlib 检测器
predictor = dlib.shape_predictor('models/dlib/shape_predictor_68_face_landmarks.dat')
face_reco_model = dlib.face_recognition_model_v1("models/dlib/dlib_face_recognition_resnet_model_v1.dat")
class DNNFaceDetector:
def __init__(self, confidence_threshold=0.7):
"""初始化OpenCV DNN人脸检测器"""
self.confidence_threshold = confidence_threshold
self.net = self.load_face_detection_model()
def load_face_detection_model(self):
"""加载OpenCV DNN人脸检测模型"""
try:
# 方法1: 尝试加载TensorFlow模型
model_path = "models/opencv_face_detector_uint8.pb"
config_path = "models/opencv_face_detector.pbtxt"
if os.path.exists(model_path) and os.path.exists(config_path):
net = cv2.dnn.readNetFromTensorflow(model_path, config_path)
print("使用 TensorFlow 人脸检测模型")
self.use_tensorflow = True
return net
except Exception as e:
print(f"TensorFlow模型加载失败: {e}")
try:
# 方法2: 尝试加载Caffe模型
proto_path = "models/deploy.prototxt"
model_path = "models/res10_300x300_ssd_iter_140000_fp16.caffemodel"
if os.path.exists(proto_path) and os.path.exists(model_path):
net = cv2.dnn.readNetFromCaffe(proto_path, model_path)
print("使用 Caffe 人脸检测模型")
self.use_tensorflow = False
return net
except Exception as e:
print(f"Caffe模型加载失败: {e}")
# 方法3: 使用OpenCV内置的Haar级联作为备选
print("使用 Haar Cascade 作为备选检测器")
self.use_haar = True
cascade_path ='models/haarcascade_frontalface_default.xml'
if os.path.exists(cascade_path):
return cv2.CascadeClassifier(cascade_path)
else:
# 如果内置路径不存在,尝试其他路径
possible_paths = [
'/usr/share/opencv4/haarcascades/haarcascade_frontalface_default.xml',
'/usr/share/opencv/haarcascades/haarcascade_frontalface_default.xml',
'haarcascade_frontalface_default.xml'
]
for path in possible_paths:
if os.path.exists(path):
return cv2.CascadeClassifier(path)
print("错误: 未找到任何人脸检测模型")
return None
def detect_faces(self, image):
"""使用DNN检测人脸"""
if hasattr(self, 'use_haar') and self.use_haar:
return self.detect_faces_haar(image)
if self.net is None:
return []
h, w = image.shape[:2]
# 创建blob输入
if hasattr(self, 'use_tensorflow') and self.use_tensorflow:
blob = cv2.dnn.blobFromImage(image, 1.0, (300, 300), [104, 117, 123], False, False)
else:
# Caffe模型
blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 1.0,
(300, 300), (104.0, 177.0, 123.0))
self.net.setInput(blob)
detections = self.net.forward()
faces = []
for i in range(detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence > self.confidence_threshold:
# 提取边界框坐标
if hasattr(self, 'use_tensorflow') and self.use_tensorflow:
# TensorFlow模型输出格式
x1 = int(detections[0, 0, i, 3] * w)
y1 = int(detections[0, 0, i, 4] * h)
x2 = int(detections[0, 0, i, 5] * w)
y2 = int(detections[0, 0, i, 6] * h)
else:
# Caffe模型输出格式
x1 = int(detections[0, 0, i, 3] * w)
y1 = int(detections[0, 0, i, 4] * h)
x2 = int(detections[0, 0, i, 5] * w)
y2 = int(detections[0, 0, i, 6] * h)
# 确保坐标在图像范围内
x1, y1 = max(0, x1), max(0, y1)
x2, y2 = min(w, x2), min(h, y2)
# 转换为dlib矩形格式(保持兼容性)
if x2 > x1 and y2 > y1: # 确保是有效的矩形
face_rect = dlib.rectangle(x1, y1, x2, y2)
faces.append((face_rect, confidence))
return faces
def detect_faces_haar(self, image):
"""备选:使用Haar级联检测"""
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
faces_cv = self.net.detectMultiScale(
gray,
scaleFactor=1.1,
minNeighbors=5,
minSize=(30, 30)
)
faces = []
for (x, y, w, h) in faces_cv:
face_rect = dlib.rectangle(x, y, x + w, y + h)
faces.append((face_rect, 0.9)) # Haar没有置信度
return faces
# 全局DNN检测器实例
dnn_detector = DNNFaceDetector(confidence_threshold=0.7)
class DNNFaceDetector:
def __init__(self, confidence_threshold=0.7):
"""初始化OpenCV DNN人脸检测器"""
self.confidence_threshold = confidence_threshold
self.net = self.load_face_detection_model()
def load_face_detection_model(self):
"""加载OpenCV DNN人脸检测模型"""
try:
# 方法1: 尝试加载TensorFlow模型
model_path = "models/opencv_face_detector_uint8.pb"
config_path = "models/opencv_face_detector.pbtxt"
if os.path.exists(model_path) and os.path.exists(config_path):
net = cv2.dnn.readNetFromTensorflow(model_path, config_path)
print("使用 TensorFlow 人脸检测模型")
self.use_tensorflow = True
return net
except Exception as e:
print(f"TensorFlow模型加载失败: {e}")
try:
# 方法2: 尝试加载Caffe模型
proto_path = "models/deploy.prototxt"
model_path = "models/res10_300x300_ssd_iter_140000_fp16.caffemodel"
if os.path.exists(proto_path) and os.path.exists(model_path):
net = cv2.dnn.readNetFromCaffe(proto_path, model_path)
print("使用 Caffe 人脸检测模型")
self.use_tensorflow = False
return net
except Exception as e:
print(f"Caffe模型加载失败: {e}")
# 方法3: 使用OpenCV内置的Haar级联作为备选
print("使用 Haar Cascade 作为备选检测器")
self.use_haar = True
cascade_path ='models/haarcascade_frontalface_default.xml'
if os.path.exists(cascade_path):
return cv2.CascadeClassifier(cascade_path)
else:
# 如果内置路径不存在,尝试其他路径
possible_paths = [
'/usr/share/opencv4/haarcascades/haarcascade_frontalface_default.xml',
'/usr/share/opencv/haarcascades/haarcascade_frontalface_default.xml',
'models/haarcascade_frontalface_default.xml'
]
for path in possible_paths:
if os.path.exists(path):
return cv2.CascadeClassifier(path)
print("错误: 未找到任何人脸检测模型")
return None
def detect_faces(self, image):
"""使用DNN检测人脸"""
if hasattr(self, 'use_haar') and self.use_haar:
return self.detect_faces_haar(image)
if self.net is None:
return []
h, w = image.shape[:2]
# 创建blob输入
if hasattr(self, 'use_tensorflow') and self.use_tensorflow:
blob = cv2.dnn.blobFromImage(image, 1.0, (300, 300), [104, 117, 123], False, False)
else:
# Caffe模型
blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 1.0,
(300, 300), (104.0, 177.0, 123.0))
self.net.setInput(blob)
detections = self.net.forward()
faces = []
for i in range(detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence > self.confidence_threshold:
# 提取边界框坐标
if hasattr(self, 'use_tensorflow') and self.use_tensorflow:
# TensorFlow模型输出格式
x1 = int(detections[0, 0, i, 3] * w)
y1 = int(detections[0, 0, i, 4] * h)
x2 = int(detections[0, 0, i, 5] * w)
y2 = int(detections[0, 0, i, 6] * h)
else:
# Caffe模型输出格式
x1 = int(detections[0, 0, i, 3] * w)
y1 = int(detections[0, 0, i, 4] * h)
x2 = int(detections[0, 0, i, 5] * w)
y2 = int(detections[0, 0, i, 6] * h)
# 确保坐标在图像范围内
x1, y1 = max(0, x1), max(0, y1)
x2, y2 = min(w, x2), min(h, y2)
# 转换为dlib矩形格式(保持兼容性)
if x2 > x1 and y2 > y1: # 确保是有效的矩形
face_rect = dlib.rectangle(x1, y1, x2, y2)
faces.append((face_rect, confidence))
return faces
def detect_faces_haar(self, image):
"""备选:使用Haar级联检测"""
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
faces_cv = self.net.detectMultiScale(
gray,
scaleFactor=1.1,
minNeighbors=5,
minSize=(30, 30)
)
faces = []
for (x, y, w, h) in faces_cv:
face_rect = dlib.rectangle(x, y, x + w, y + h)
faces.append((face_rect, 0.9)) # Haar没有置信度
return faces
# 全局DNN检测器实例
dnn_detector = DNNFaceDetector(confidence_threshold=0.7)
def return_128d_features(path_img):
"""返回单张图像的 128D 特征(使用OpenCV DNN检测)"""
"""返回单张图像的 128D 特征(使用OpenCV DNN检测)"""
try:
# 直接使用OpenCV读取图像,避免PIL转换
img_rd = cv2.imread(path_img)
if img_rd is None:
logging.error("无法读取图像: %s", path_img)
return None
# 对于大图像,先缩小以提高检测速度
h, w = img_rd.shape[:2]
if w > 1000 or h > 1000:
scale = 1000 / max(w, h)
small_img = cv2.resize(img_rd, (int(w * scale), int(h * scale)))
face_results = dnn_detector.detect_faces(small_img)
# 缩放坐标回原图
scaled_faces = []
for face_rect, confidence in face_results:
scaled_face = dlib.rectangle(
int(face_rect.left() / scale),
int(face_rect.top() / scale),
int(face_rect.right() / scale),
int(face_rect.bottom() / scale)
)
scaled_faces.append((scaled_face, confidence))
face_results = scaled_faces
else:
face_results = dnn_detector.detect_faces(img_rd)
if len(face_results) != 0:
# 取置信度最高的人脸
best_face = max(face_results, key=lambda x: x[1])
face_rect = best_face[0]
logging.info("%-40s %-20s", "检测到人脸的图像:", path_img)
logging.info("检测置信度: %.3f", best_face[1])
# 使用dlib进行特征点检测和特征提取
shape = predictor(img_rd, face_rect)
# 直接使用OpenCV读取图像,避免PIL转换
img_rd = cv2.imread(path_img)
if img_rd is None:
logging.error("无法读取图像: %s", path_img)
return None
# 对于大图像,先缩小以提高检测速度
h, w = img_rd.shape[:2]
if w > 1000 or h > 1000:
scale = 1000 / max(w, h)
small_img = cv2.resize(img_rd, (int(w * scale), int(h * scale)))
face_results = dnn_detector.detect_faces(small_img)
# 缩放坐标回原图
scaled_faces = []
for face_rect, confidence in face_results:
scaled_face = dlib.rectangle(
int(face_rect.left() / scale),
int(face_rect.top() / scale),
int(face_rect.right() / scale),
int(face_rect.bottom() / scale)
)
scaled_faces.append((scaled_face, confidence))
face_results = scaled_faces
else:
face_results = dnn_detector.detect_faces(img_rd)
if len(face_results) != 0:
# 取置信度最高的人脸
best_face = max(face_results, key=lambda x: x[1])
face_rect = best_face[0]
logging.info("%-40s %-20s", "检测到人脸的图像:", path_img)
logging.info("检测置信度: %.3f", best_face[1])
# 使用dlib进行特征点检测和特征提取
shape = predictor(img_rd, face_rect)
face_descriptor = face_reco_model.compute_face_descriptor(img_rd, shape)
return face_descriptor
else:
logging.warning("未检测到人脸: %s", path_img)
return None
except Exception as e:
logging.error("处理图像时出错 %s: %s", path_img, e)
return None
def return_features_mean_personX(path_face_personX):
"""返回 personX 的 128D 特征均值"""
features_list_personX = []
photos_list = os.listdir(path_face_personX)
if photos_list:
for photo in photos_list:
# 只处理图像文件
if not photo.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp')):
continue
# 只处理图像文件
if not photo.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp')):
continue
photo_path = os.path.join(path_face_personX, photo)
logging.info("正在读取图像: %s", photo_path)
features_128d = return_128d_features(photo_path)
if features_128d is not None:
features_list_personX.append(features_128d)
else:
logging.warning("无法从图像中提取特征: %s", photo_path)
else:
logging.warning("文件夹为空: %s", path_face_personX)
# 计算 128D 特征的均值
if features_list_personX:
features_mean_personX = np.array(features_list_personX).mean(axis=0)
logging.info("成功提取 %d 张人脸特征", len(features_list_personX))
else:
features_mean_personX = np.zeros(128, dtype=np.float64)
logging.warning("未提取到任何特征: %s", path_face_personX)
return features_mean_personX
def get_person_name_from_folder(folder_name):
"""从文件夹名称获取有意义的姓名"""
# 常见的文件夹前缀
prefixes = ['person_', 'face_', 'user_']
for prefix in prefixes:
if folder_name.startswith(prefix):
name_part = folder_name[len(prefix):]
# 如果剩下的部分是纯数字,使用完整文件夹名
if name_part.isdigit():
return folder_name
else:
return name_part
return folder_name
def main():
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# 检查源文件夹是否存在
if not os.path.exists(path_images_from_camera):
logging.error("人脸图像文件夹不存在: %s", path_images_from_camera)
return
# 获取人脸文件夹列表
person_list = os.listdir(path_images_from_camera)
person_list.sort()
if not person_list:
logging.error("没有人脸文件夹可处理")
return
logging.info("找到 %d 个人脸文件夹: %s", len(person_list), person_list)
# 创建CSV文件
with open("data/features_all.csv", "w", newline="", encoding="utf-8") as csvfile:
writer = csv.writer(csvfile)
successful_count = 0
for person_folder in person_list:
folder_path = os.path.join(path_images_from_camera, person_folder)
if not os.path.isdir(folder_path):
logging.warning("跳过非文件夹: %s", person_folder)
logging.warning("跳过非文件夹: %s", person_folder)
continue
logging.info("=" * 60)
logging.info("=" * 60)
logging.info("处理文件夹: %s", person_folder)
# 提取特征
features_mean = return_features_mean_personX(folder_path)
# 获取有意义的姓名
person_name = get_person_name_from_folder(person_folder)
logging.info("使用姓名: %s", person_name)
# 检查特征是否有效(非全零)
if np.all(features_mean == 0):
logging.warning("特征提取失败,跳过: %s", person_folder)
continue
# 检查特征是否有效(非全零)
if np.all(features_mean == 0):
logging.warning("特征提取失败,跳过: %s", person_folder)
continue
# 构建行数据:姓名 + 128维特征
row_data = [person_name] + features_mean.tolist()
writer.writerow(row_data)
successful_count += 1
logging.info("完成: %s", person_name)
logging.info("-" * 50)
logging.info("=" * 60)
logging.info("处理完成: 成功 %d/%d 个人脸文件夹", successful_count, len(person_list))
logging.info("=" * 60)
logging.info("处理完成: 成功 %d/%d 个人脸文件夹", successful_count, len(person_list))
logging.info("特征数据已保存到: data/features_all.csv")
if __name__ == '__main__':
main()