我想要將 Vegas Pro 中的字幕和文字導出並生成 Youtube 的 Chapter 章節。一開始我嘗試了 Tools 里的 Scripting,但是並沒有成功。後來找到 Vegasaur Toolkit 這個工具,它有 30 天免費試用,可以將軌道上的文字導出成 srt 字幕文件。然後我使用 ffmpeg 將 srt 轉換成 vtt 字幕文件,最後再通過 Python 腳本將其轉換成 Youtube 支持的章節文本文檔。
下載 Vegasaur Toolkit,並安裝。我的 Vegas Pro 版本是 18,我安裝的 Vegasaur Toolkit 版本是 3.9.5。安裝完,打開 .veg 文件,鼠標選中要導出的字幕軌道,然後點擊 View > Extensions > Vegasaur > Timeline > Text Generation Wizard.在彈出的窗口中選擇 Export text events,然後點 Next。然後選擇 Selected Tracks 和 Save to File 並設置要導出的文件名和文件格式。最後點擊 Finish 就導出了。
剩下的步驟我使用了腳本來輔助完成。我有兩條字幕軌道,章節文本基本上都位於第一條軌道,但是偶爾第而條軌道也會有,所有我還需要合併一下兩個軌道導出的字幕文件:
#vtt utils
import subprocess
import sys
from datetime import timedelta
#usage:
#srt to youtube chapters
#python main.py srt2yt 1.srt 2.srt
def add_style_to_vtt(input_file, output_file, style):
with open(input_file, 'r') as f:
lines = f.readlines()
with open(output_file, 'w') as f:
for line in lines:
if '-->' in line:
start, end = line.strip().split(' --> ')
# Check if the timecodes are less than 60 minutes
if len(start) <= 9: # Check if start time is less than or equal to 60 minutes
start = '00:' + start
if len(end) <= 9: # Check if end time is less than or equal to 60 minutes
end = '00:' + end
# Add style settings after the timecode
#line = line.strip() + ' ' + style + '\n'
line = f'{start} --> {end} {style}\n'
f.write(line)
def combine_two_vtt(input_file_1, input_file_2, output_file):
# with open(input_file_1, 'r') as f:
# lines_1 = f.readlines()
# with open(input_file_2, 'r') as f:
# lines_2 = f.readlines()
# with open(output_file, 'w') as f:
# f.writelines(lines_1 + lines_2)
vtt1_lines = parse_vtt_file(input_file_1)
vtt2_lines = parse_vtt_file(input_file_2)
lines = vtt1_lines + vtt2_lines
write_sorted_vtt(lines, output_file)
def time_to_seconds(time_str):
try:
hours, minutes, seconds = map(float, time_str.split(':'))
except:
hours = 0
minutes, seconds = map(float, time_str.split(':'))
return hours * 3600 + minutes * 60 + seconds
def seconds_to_time(seconds):
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
seconds = seconds % 60
return f'{hours:02.0f}:{minutes:02.0f}:{seconds:06.3f}'
def youtube_chapters(input_file, output_file):
with open(input_file, 'r') as f:
lines = f.readlines()
with open(output_file, 'w') as f:
last_time = '00:00'
i = 0
while i < len(lines):
if '-->' in lines[i]:
caption_list=[]
first_dot_index = lines[i].find('.')
current_time = lines[i][:first_dot_index]
#if current_time - last time < 10: then current time = last time + 10 seconds
if time_to_seconds(current_time) - time_to_seconds(last_time) < 10:
current_time = seconds_to_time(time_to_seconds(last_time) + 10)
# set the current time to the last time
last_time = current_time
caption_list.append(current_time)
while i < len(lines)-1 and lines[i+1] != '\n':
caption_list.append(lines[i+1].strip())
i += 1
if len(caption_list) == 4:
#remove the third element of list
caption_list.pop(2)
caption = ' '.join(caption_list)
f.write(caption + '\n')
i += 1
def parse_vtt_timestamp(ts):
"""Parses a VTT timestamp into a timedelta object."""
parts = ts.split(":")
if len(parts) == 3:
h, m, s = parts
elif len(parts) == 2:
h = 0
m, s = parts
else:
raise ValueError(f"Unexpected timestamp format: {ts}")
s, ms = s.split(".")
return timedelta(hours=int(h), minutes=int(m), seconds=int(s), milliseconds=int(ms))
def parse_vtt_file(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
lines = f.readlines()
entries = []
i = 0
while i < len(lines):
line = lines[i].strip()
if "-->" in line:
timestamp = line
start_time = parse_vtt_timestamp(timestamp.split(" --> ")[0])
content = []
i += 1
while i < len(lines) and lines[i].strip() != "":
content.append(lines[i].rstrip('\n'))
i += 1
entries.append((start_time, timestamp, content))
else:
i += 1
return entries
def write_sorted_vtt(entries, output_path):
entries.sort(key=lambda x: x[0])
with open(output_path, 'w', encoding='utf-8') as f:
f.write("WEBVTT\n\n")
for _, timestamp, content in entries:
f.write(f"{timestamp}\n")
for line in content:
f.write(f"{line}\n")
f.write("\n")
def format_youtube_time(td):
total_seconds = int(td.total_seconds())
hours = total_seconds // 3600
minutes = (total_seconds % 3600) // 60
seconds = total_seconds % 60
if hours > 0:
return f"{hours:02}:{minutes:02}:{seconds:02}"
else:
return f"{minutes:02}:{seconds:02}"
def parse_vtt_for_chapters(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
lines = f.readlines()
entries = []
i = 0
while i < len(lines):
line = lines[i].strip()
if "-->" in line:
timestamp_line = line
start_str, end_str = timestamp_line.split(" --> ")
start = parse_vtt_timestamp(start_str.strip())
end = parse_vtt_timestamp(end_str.strip())
duration = end - start
i += 1
content = []
while i < len(lines) and lines[i].strip():
content.append(lines[i].strip())
i += 1
# Expected format: Chinese name on 1st line, English name on 3rd line
if len(content) >= 2:
chinese_name = content[0]
english_name = content[-1]
entries.append((chinese_name, english_name, start, duration))
else:
i += 1
# Deduplicate: keep only the longest-duration entry per Chinese name
chapter_dict = {}
for cname, ename, start, duration in entries:
if cname not in chapter_dict or duration > chapter_dict[cname][1]:
chapter_dict[cname] = (ename, duration, start)
# Sort by start time
sorted_chapters = sorted(
[(data[2], cname, data[0]) for cname, data in chapter_dict.items()],
key=lambda x: x[0]
)
return sorted_chapters
def write_youtube_chapters(chapters, output_path):
with open(output_path, 'w', encoding='utf-8') as f:
for start, cname, ename in chapters:
time_str = format_youtube_time(start)
f.write(f"{time_str} {cname} {ename}\n")
if __name__ == "__main__":
# choose function from command line arguments
if len(sys.argv) > 2:
# combine two vtt files
func = sys.argv[1]
if func == 'combine':
input_file_1 = sys.argv[2]
input_file_2 = sys.argv[3]
output_file = input_file_1[:-4] + '.combined.vtt'
combine_two_vtt(input_file_1, input_file_2, output_file)
elif func == 'style':
input_file = sys.argv[2]
output_file = input_file[:-4] + '.style.vtt'
style = sys.argv[3]
# style = 'position:100% align:right size:50%'
# style = 'position:0% align:left size:50%'
add_style_to_vtt(input_file, output_file, style)
elif func == 'youtube':
input_file = sys.argv[2]
output_file = input_file[:-4] + '.youtube.txt'
# youtube_chapters(input_file, output_file)
chapters = parse_vtt_for_chapters(input_file)
write_youtube_chapters(chapters, output_file)
elif func == 'srt2yt':
input_file_1 = sys.argv[2]
input_file_2 = sys.argv[3]
vtt1 = input_file_1[:-4] + '.vtt'
vtt2 = input_file_2[:-4] + '.vtt'
subprocess.run(['ffmpeg', '-i', input_file_1, vtt1])
subprocess.run(['ffmpeg', '-i', input_file_2, vtt2])
#wait for user to input anything to continue
input("Press enter to continue...")
combined_file = input_file_1[:-4] + '.combined.vtt'
combine_two_vtt(vtt1, vtt2, combined_file)
yt_file = input_file_1[:-4] + '.youtube.txt'
chapters = parse_vtt_for_chapters(combined_file)
write_youtube_chapters(chapters, yt_file)
腳本中還有一些 Youtube 支持的 vtt 格式字幕樣式的嘗試。