我想要將 Vegas Pro 中的字幕和文字導出並生成 Youtube 的 Chapter 章節。一開始我嘗試了 Tools 里的 Scripting,但是並沒有成功。後來找到 Vegasaur Toolkit 這個工具,它有 30 天免費試用,可以將軌道上的文字導出成 srt 字幕文件。然後我使用 ffmpeg 將 srt 轉換成 vtt 字幕文件,最後再通過 Python 腳本將其轉換成 Youtube 支持的章節文本文檔。
下載 Vegasaur Toolkit,並安裝。我的 Vegas Pro 版本是 18,我安裝的 Vegasaur Toolkit 版本是 3.9.5。安裝完,打開 .veg 文件,鼠標選中要導出的字幕軌道,然後點擊 View > Extensions > Vegasaur > Timeline > Text Generation Wizard.在彈出的窗口中選擇 Export text events,然後點 Next。然後選擇 Selected Tracks 和 Save to File 並設置要導出的文件名和文件格式。最後點擊 Finish 就導出了。
剩下的步驟我使用了腳本來輔助完成。我有兩條字幕軌道,章節文本基本上都位於第一條軌道,但是偶爾第而條軌道也會有,所有我還需要合併一下兩個軌道導出的字幕文件:
#vtt utils import subprocess import sys from datetime import timedelta #usage: #srt to youtube chapters #python main.py srt2yt 1.srt 2.srt def add_style_to_vtt(input_file, output_file, style): with open(input_file, 'r') as f: lines = f.readlines() with open(output_file, 'w') as f: for line in lines: if '-->' in line: start, end = line.strip().split(' --> ') # Check if the timecodes are less than 60 minutes if len(start) <= 9: # Check if start time is less than or equal to 60 minutes start = '00:' + start if len(end) <= 9: # Check if end time is less than or equal to 60 minutes end = '00:' + end # Add style settings after the timecode #line = line.strip() + ' ' + style + '\n' line = f'{start} --> {end} {style}\n' f.write(line) def combine_two_vtt(input_file_1, input_file_2, output_file): # with open(input_file_1, 'r') as f: # lines_1 = f.readlines() # with open(input_file_2, 'r') as f: # lines_2 = f.readlines() # with open(output_file, 'w') as f: # f.writelines(lines_1 + lines_2) vtt1_lines = parse_vtt_file(input_file_1) vtt2_lines = parse_vtt_file(input_file_2) lines = vtt1_lines + vtt2_lines write_sorted_vtt(lines, output_file) def time_to_seconds(time_str): try: hours, minutes, seconds = map(float, time_str.split(':')) except: hours = 0 minutes, seconds = map(float, time_str.split(':')) return hours * 3600 + minutes * 60 + seconds def seconds_to_time(seconds): hours = int(seconds // 3600) minutes = int((seconds % 3600) // 60) seconds = seconds % 60 return f'{hours:02.0f}:{minutes:02.0f}:{seconds:06.3f}' def youtube_chapters(input_file, output_file): with open(input_file, 'r') as f: lines = f.readlines() with open(output_file, 'w') as f: last_time = '00:00' i = 0 while i < len(lines): if '-->' in lines[i]: caption_list=[] first_dot_index = lines[i].find('.') current_time = lines[i][:first_dot_index] #if current_time - last time < 10: then current time = last time + 10 seconds if time_to_seconds(current_time) - time_to_seconds(last_time) < 10: current_time = seconds_to_time(time_to_seconds(last_time) + 10) # set the current time to the last time last_time = current_time caption_list.append(current_time) while i < len(lines)-1 and lines[i+1] != '\n': caption_list.append(lines[i+1].strip()) i += 1 if len(caption_list) == 4: #remove the third element of list caption_list.pop(2) caption = ' '.join(caption_list) f.write(caption + '\n') i += 1 def parse_vtt_timestamp(ts): """Parses a VTT timestamp into a timedelta object.""" parts = ts.split(":") if len(parts) == 3: h, m, s = parts elif len(parts) == 2: h = 0 m, s = parts else: raise ValueError(f"Unexpected timestamp format: {ts}") s, ms = s.split(".") return timedelta(hours=int(h), minutes=int(m), seconds=int(s), milliseconds=int(ms)) def parse_vtt_file(file_path): with open(file_path, 'r', encoding='utf-8') as f: lines = f.readlines() entries = [] i = 0 while i < len(lines): line = lines[i].strip() if "-->" in line: timestamp = line start_time = parse_vtt_timestamp(timestamp.split(" --> ")[0]) content = [] i += 1 while i < len(lines) and lines[i].strip() != "": content.append(lines[i].rstrip('\n')) i += 1 entries.append((start_time, timestamp, content)) else: i += 1 return entries def write_sorted_vtt(entries, output_path): entries.sort(key=lambda x: x[0]) with open(output_path, 'w', encoding='utf-8') as f: f.write("WEBVTT\n\n") for _, timestamp, content in entries: f.write(f"{timestamp}\n") for line in content: f.write(f"{line}\n") f.write("\n") def format_youtube_time(td): total_seconds = int(td.total_seconds()) hours = total_seconds // 3600 minutes = (total_seconds % 3600) // 60 seconds = total_seconds % 60 if hours > 0: return f"{hours:02}:{minutes:02}:{seconds:02}" else: return f"{minutes:02}:{seconds:02}" def parse_vtt_for_chapters(file_path): with open(file_path, 'r', encoding='utf-8') as f: lines = f.readlines() entries = [] i = 0 while i < len(lines): line = lines[i].strip() if "-->" in line: timestamp_line = line start_str, end_str = timestamp_line.split(" --> ") start = parse_vtt_timestamp(start_str.strip()) end = parse_vtt_timestamp(end_str.strip()) duration = end - start i += 1 content = [] while i < len(lines) and lines[i].strip(): content.append(lines[i].strip()) i += 1 # Expected format: Chinese name on 1st line, English name on 3rd line if len(content) >= 2: chinese_name = content[0] english_name = content[-1] entries.append((chinese_name, english_name, start, duration)) else: i += 1 # Deduplicate: keep only the longest-duration entry per Chinese name chapter_dict = {} for cname, ename, start, duration in entries: if cname not in chapter_dict or duration > chapter_dict[cname][1]: chapter_dict[cname] = (ename, duration, start) # Sort by start time sorted_chapters = sorted( [(data[2], cname, data[0]) for cname, data in chapter_dict.items()], key=lambda x: x[0] ) return sorted_chapters def write_youtube_chapters(chapters, output_path): with open(output_path, 'w', encoding='utf-8') as f: for start, cname, ename in chapters: time_str = format_youtube_time(start) f.write(f"{time_str} {cname} {ename}\n") if __name__ == "__main__": # choose function from command line arguments if len(sys.argv) > 2: # combine two vtt files func = sys.argv[1] if func == 'combine': input_file_1 = sys.argv[2] input_file_2 = sys.argv[3] output_file = input_file_1[:-4] + '.combined.vtt' combine_two_vtt(input_file_1, input_file_2, output_file) elif func == 'style': input_file = sys.argv[2] output_file = input_file[:-4] + '.style.vtt' style = sys.argv[3] # style = 'position:100% align:right size:50%' # style = 'position:0% align:left size:50%' add_style_to_vtt(input_file, output_file, style) elif func == 'youtube': input_file = sys.argv[2] output_file = input_file[:-4] + '.youtube.txt' # youtube_chapters(input_file, output_file) chapters = parse_vtt_for_chapters(input_file) write_youtube_chapters(chapters, output_file) elif func == 'srt2yt': input_file_1 = sys.argv[2] input_file_2 = sys.argv[3] vtt1 = input_file_1[:-4] + '.vtt' vtt2 = input_file_2[:-4] + '.vtt' subprocess.run(['ffmpeg', '-i', input_file_1, vtt1]) subprocess.run(['ffmpeg', '-i', input_file_2, vtt2]) #wait for user to input anything to continue input("Press enter to continue...") combined_file = input_file_1[:-4] + '.combined.vtt' combine_two_vtt(vtt1, vtt2, combined_file) yt_file = input_file_1[:-4] + '.youtube.txt' chapters = parse_vtt_for_chapters(combined_file) write_youtube_chapters(chapters, yt_file)
腳本中還有一些 Youtube 支持的 vtt 格式字幕樣式的嘗試。