Skip to content

Commit 8cfeeaa

Browse files
committed
ver25.01.17
1 parent b650b73 commit 8cfeeaa

File tree

5 files changed

+611577
-48
lines changed

5 files changed

+611577
-48
lines changed

.idea/markdown.xml

Lines changed: 9 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

conf.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,15 @@
1818
# youtube asl dataset
1919
ROOT = os.path.dirname(os.path.abspath(__file__))
2020
ID = "youtube-asl_youtube_asl_video_ids.txt"
21+
2122
VIDEO_DIR = f"{ROOT}/dataset/origin/"
2223
OUTPUT_DIR = f"{ROOT}/dataset/npy/"
24+
CSV_FILE = f"youtube_asl.csv"
25+
2326
TRANSCRIPT_DIR = f"{ROOT}/dataset/transcript/"
24-
CSV_FILE = f"video_info.csv"
2527
DURATION = 16
2628
OVERLAP = 4
27-
MAX_WORKERS = 8
29+
MAX_WORKERS = 2
2830
LANGUAGE = [
2931
"en",
3032
"ase",

s2_transcript_preprocess.py

Lines changed: 26 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -56,62 +56,46 @@ def read_transcript_file(json_file):
5656
return json.load(file)
5757

5858

59-
def process_transcript_segments(
60-
transcripts, video_id, duration=c.DURATION, overlap=c.OVERLAP
61-
):
59+
def process_transcript_segments(transcripts, video_id):
6260
"""
63-
Splits transcript into overlapping segments of specified duration.
61+
Processes individual transcript captions, filtering based on length and duration constraints.
6462
6563
Args:
6664
transcripts (list): List of transcript dictionaries
6765
video_id (str): Video identifier for naming segments
68-
duration (float): Length of each segment in seconds
69-
overlap (float): Overlap duration between segments
7066
7167
Returns:
72-
list: List of segmented and processed transcript dictionaries
68+
list: List of processed transcript dictionaries meeting the criteria
7369
"""
7470
processed_segments = []
75-
segment_start = 0
76-
segment_index = -1
71+
segment_index = 0
7772

7873
# Filter valid transcript entries
79-
valid_entries = [t for t in transcripts if "text" in t and "start" in t]
74+
valid_entries = [t for t in transcripts if "text" in t and "start" in t and "duration" in t]
8075
if not valid_entries:
8176
print(f"No valid transcripts for video {video_id}")
8277
return processed_segments
8378

84-
final_timestamp = valid_entries[-1]["start"]
85-
86-
while segment_start < final_timestamp:
87-
segment_end = segment_start + duration
88-
segment_texts = []
89-
90-
# Collect text within current segment window
91-
for entry in valid_entries:
92-
if segment_start <= entry["start"] < segment_start + duration:
93-
segment_texts.append(entry["text"])
94-
segment_end = max(
95-
segment_end, entry["start"] + entry.get("duration", 0)
96-
)
97-
98-
processed_text = normalize_text(" ".join(segment_texts))
99-
segment_start += duration - overlap
100-
segment_index += 1
101-
102-
if not processed_text:
103-
continue
104-
105-
segment_data = {
106-
"SENTENCE_NAME": f"{video_id}-{segment_index:03d}",
107-
"START": segment_start - (duration - overlap),
108-
"END": float(np.ceil(segment_end)),
109-
"SENTENCE": processed_text,
110-
}
111-
processed_segments.append(segment_data)
112-
113-
if valid_entries[-1]["text"] in segment_texts:
114-
break
79+
for entry in valid_entries:
80+
# Get the normalized text
81+
processed_text = normalize_text(entry["text"])
82+
83+
# Apply filtering criteria:
84+
# - Text length <= 300 characters
85+
# - Duration between 0.2s and 60s
86+
if (len(processed_text) <= 300 and
87+
0.2 <= entry["duration"] <= 60.0 and
88+
processed_text): # Ensure non-empty text
89+
90+
segment_data = {
91+
"VIDEO_NAME": video_id,
92+
"SENTENCE_NAME": f"{video_id}-{segment_index:03d}",
93+
"START_REALIGNED": entry["start"],
94+
"END_REALIGNED": entry["start"] + entry["duration"],
95+
"SENTENCE": processed_text,
96+
}
97+
processed_segments.append(segment_data)
98+
segment_index += 1
11599

116100
return processed_segments
117101

@@ -130,6 +114,7 @@ def save_segments_to_csv(segment_data, csv_path):
130114

131115
df.to_csv(
132116
csv_path,
117+
sep="\t",
133118
mode=mode,
134119
header=header,
135120
index=False,

s3_mediapipe_labelling.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@ def read_timestamp_data(csv_file: str) -> Dict[str, List[float]]:
2525
Dict[str, List[float]]: Dictionary mapping segment names to [start, end] timestamps
2626
"""
2727
try:
28-
df = pd.read_csv(csv_file, delimiter=",", on_bad_lines="skip")[
29-
["SENTENCE_NAME", "START", "END"]
28+
df = pd.read_csv(csv_file, delimiter="\t", on_bad_lines="skip")[
29+
["SENTENCE_NAME", "START_REALIGNED", "END_REALIGNED"]
3030
].dropna()
3131
return (
32-
df.set_index("SENTENCE_NAME")[["START", "END"]]
33-
.apply(lambda row: [row["START"], row["END"]], axis=1)
32+
df.set_index("SENTENCE_NAME")[["START_REALIGNED", "END_REALIGNED"]]
33+
.apply(lambda row: [row["START_REALIGNED"], row["END_REALIGNED"]], axis=1)
3434
.to_dict()
3535
)
3636
except Exception as e:
@@ -135,7 +135,7 @@ def process_video_segment(
135135

136136
# Determine frame skip rate based on video FPS
137137
fps = cap.get(cv2.CAP_PROP_FPS)
138-
frame_skip = 6 if fps > 60 else (4 if fps > 40 else (2 if fps > 20 else 1))
138+
frame_skip = 1 if fps <= 16 else c.FRAME_SKIP
139139

140140
# Calculate frame ranges
141141
start_frame, end_frame = int(start_time * fps), int(end_time * fps)

0 commit comments

Comments
 (0)