Skip to content

Commit 605f499

Browse files
committed
ver 24.12.19
1. add how2sign mediapipe preprocess
1 parent 6373d90 commit 605f499

File tree

3 files changed

+155
-156
lines changed

3 files changed

+155
-156
lines changed

H2S_mediapipe.py

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
import os
2+
import cv2
3+
import mediapipe as mp
4+
import numpy as np
5+
import logging
6+
from glob import glob
7+
from typing import List
8+
from concurrent.futures import ProcessPoolExecutor
9+
10+
import conf as c
11+
12+
logging.basicConfig(level=logging.DEBUG)
13+
logger = logging.getLogger(__name__)
14+
15+
mp_holistic = mp.solutions.holistic
16+
17+
18+
def find_video_files(directory: str, pattern="*.mp4") -> List[str]:
19+
"""Find all .mp4 files in the specified directory and return base names without extension."""
20+
return [
21+
os.path.splitext(os.path.basename(f))[0]
22+
for f in glob(os.path.join(directory, pattern))
23+
]
24+
25+
26+
def mediapipe_detection(image, model):
27+
return model.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
28+
29+
30+
def extract_keypoints(results):
31+
def landmarks_to_np(landmarks, indices):
32+
return (
33+
np.array(
34+
[[landmarks[i].x, landmarks[i].y, landmarks[i].z] for i in indices]
35+
)
36+
if landmarks
37+
else np.zeros((len(indices), 3))
38+
)
39+
40+
pose = landmarks_to_np(
41+
getattr(results.pose_landmarks, "landmark", None), c.POSE_IDX
42+
)
43+
lh = landmarks_to_np(
44+
getattr(results.left_hand_landmarks, "landmark", None), c.HAND_IDX
45+
)
46+
rh = landmarks_to_np(
47+
getattr(results.right_hand_landmarks, "landmark", None), c.HAND_IDX
48+
)
49+
face = landmarks_to_np(
50+
getattr(results.face_landmarks, "landmark", None), c.FACE_IDX
51+
)
52+
53+
return np.concatenate([pose.flatten(), face.flatten(), lh.flatten(), rh.flatten()])
54+
55+
56+
def process_video(video_path: str, output_file: str):
57+
"""Process an entire video, extract holistic keypoints, and save as .npy."""
58+
cap = cv2.VideoCapture(video_path)
59+
if not cap.isOpened():
60+
logger.error(f"Error opening video: {video_path}")
61+
return
62+
63+
fps = cap.get(cv2.CAP_PROP_FPS)
64+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
65+
frame_skip = 1
66+
67+
if total_frames > c.MAX_FRAME:
68+
frame_skip = np.ceil(total_frames / c.MAX_FRAME)
69+
70+
all_landmarks = []
71+
with mp_holistic.Holistic(
72+
model_complexity=1,
73+
refine_face_landmarks=True,
74+
min_detection_confidence=0.5,
75+
min_tracking_confidence=0.5,
76+
) as holistic:
77+
current_frame = 0
78+
while current_frame < total_frames:
79+
ret, frame = cap.read()
80+
if not ret:
81+
break
82+
if current_frame % frame_skip == 0:
83+
results = mediapipe_detection(frame, holistic)
84+
all_landmarks.append(extract_keypoints(results))
85+
current_frame += 1
86+
87+
cap.release()
88+
89+
data_array = np.array(all_landmarks)
90+
if data_array.size > 0 and np.any(data_array):
91+
os.makedirs(os.path.dirname(output_file), exist_ok=True)
92+
np.save(output_file, data_array)
93+
logger.info(f"Saved landmarks to {output_file}")
94+
else:
95+
logger.info(f"No valid landmarks for video {video_path}, not saving.")
96+
97+
98+
def main():
99+
available_videos = find_video_files(c.H2S_VIDEO_DIR)
100+
existed_files = find_video_files(c.H2S_OUTPUT_DIR, pattern="*.npy")
101+
102+
tasks = []
103+
for video_name in available_videos:
104+
video_path = os.path.join(c.H2S_VIDEO_DIR, f"{video_name}.mp4")
105+
output_file = os.path.join(c.H2S_OUTPUT_DIR, f"{video_name}.npy")
106+
if video_name not in existed_files:
107+
tasks.append((video_path, output_file))
108+
else:
109+
logger.info(f"Skipping existing file: {output_file}")
110+
111+
# Use multiple processors
112+
with ProcessPoolExecutor(max_workers=c.MAX_WORKERS) as executor:
113+
for video_path, output_file in tasks:
114+
executor.submit(process_video, video_path, output_file)
115+
116+
117+
if __name__ == "__main__":
118+
main()

conf.py

Lines changed: 37 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,48 @@
11
import os
22

3-
3+
# gpu setting
44
USE_GPU = True
55

6+
# how2sign dataset
7+
H2S_VIDEO_DIR = "dataset/how2sign/"
8+
H2S_OUTPUT_DIR = "dataset/how2sign/npy/"
69

7-
# video downloader
10+
# youtube asl dataset
811
ROOT = os.path.dirname(os.path.abspath(__file__))
9-
ID = 'youtube-asl_youtube_asl_video_ids.txt'
10-
VIDEO_DIR = f'{ROOT}/dataset/origin/'
11-
OUTPUT_DIR = f'{ROOT}/dataset/10fps/'
12-
TRANSCRIPT_DIR = f'{ROOT}/dataset/transcript/'
13-
# transcript_dir = f'{root}/dataset/test/'
14-
CSV_FILE = f'video_info.csv'
15-
12+
ID = "youtube-asl_youtube_asl_video_ids.txt"
13+
VIDEO_DIR = f"{ROOT}/dataset/origin/"
14+
OUTPUT_DIR = f"{ROOT}/dataset/npy/"
15+
TRANSCRIPT_DIR = f"{ROOT}/dataset/transcript/"
16+
CSV_FILE = f"video_info.csv"
17+
MAX_FRAME = 512
1618
DURATION = 16
1719
OVERLAP = 4
18-
MAX_WORKERS = 1
19-
LANGUAGE = ['en', 'ase', 'en-US', 'en-CA', 'en-GB', 'en-AU', 'en-NZ', 'en-IN', 'en-ZA', 'en-IE', 'en-SG', 'en-PH', 'en-NG', 'en-PK', 'en-JM']
20+
MAX_WORKERS = 8
21+
LANGUAGE = [
22+
"en",
23+
"ase",
24+
"en-US",
25+
"en-CA",
26+
"en-GB",
27+
"en-AU",
28+
"en-NZ",
29+
"en-IN",
30+
"en-ZA",
31+
"en-IE",
32+
"en-SG",
33+
"en-PH",
34+
"en-NG",
35+
"en-PK",
36+
"en-JM",
37+
]
2038

21-
# mediapipe
22-
POSE_IDX = [11, 12, 13, 14, 23, 24]
23-
FACE_IDX = [0, 4, 13, 14, 17, 33, 37, 39, 46, 52, 55, 61, 64, 81, 82, 93,
24-
133, 151, 152, 159, 172, 178, 181, 263, 269, 276, 282, 285, 291,
25-
294, 311, 323, 362, 386, 397, 468, 473]
39+
# mediapipe landmark indices
2640
HAND_IDX = list(range(21))
41+
POSE_IDX = [11, 12, 13, 14, 23, 24]
42+
FACE_IDX = [
43+
0, 4, 13, 14, 17, 33, 37, 39, 46, 52, 55, 61, 64, 81, 82, 93,
44+
133, 151, 152, 159, 172, 178, 181, 263, 269, 276, 282, 285, 291,
45+
294, 311, 323, 362, 386, 397, 468, 473
46+
]
47+
2748

28-
# import numpy as np
29-
# data = np.load('dataset/10fps/UznY5SfH0RI-015.npy')
30-
# print(data.shape)
31-
# print(data)

test.py

Lines changed: 0 additions & 136 deletions
This file was deleted.

0 commit comments

Comments
 (0)