#!/usr/bin/env python # -*- coding: utf-8 -*- import codecs import argparse import sys, os, glob # Use this list to delete automatically subs containing one of the following words. All in lower case. exclusion = ["subtitles by", "sync by", "", "{\\i1}").replace("", "{\\i0}") # Fixes the subtitle if text.find("{\\i1}") != -1 and text.find("{\\i0}") == -1: text += "{\\i0}" return "Dialogue: 0,{0},{1},{2},,0000,0000,0000,,{3}\n".format(start, end, font, text) def getTime(self, t): ms = t % 1000 t = (t - ms) / 1000 s = t % 60 t = (t - s) / 60 m = t % 60 h = (t - m) / 60 ms /= 10 return '{0:01}:{1:02}:{2:02}.{3:02}'.format(h, m, s, ms) def getHeader(self): return "[Script Info]\n" \ + "ScriptType: V4.00+\n" \ + "\n" \ + "[V4+ Styles]\n" \ + "Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding\n" \ + "Style: Regular," + self.fontRegular + "," + self.fontSize + ",&H00FFFFFF,&H0000FFFF,&H00000000,&H00000000,-1,0,0,0,100,100,0,0,1,1,1,2,20,20,20,0\n" \ + "\n" \ + "[Events]\n" \ + "Format: Layer, Start, End, Style, Actor, MarginL, MarginR, MarginV, Effect, Text\n" #+ "Style: Italic," + self.fontItalic + "," + self.fontSize + ",&H00FFFFFF,&H0000FFFF,&H00000000,&H00000000,-1,-1,0,0,100,100,0,0,1,1,1,2,20,20,20,0\n" \ class SubRipReader: def __init__(self, filename): Console.v('Reading "' + filename + '"') self.filename = filename self.bomLength = 0 self.encoding = 'utf-8' self.lines = [] self.elements = [] self.detectBomAndEncoding() self.read() self.parse() def detectBomAndEncoding(self): f = open(self.filename, 'r') begin = f.read(4) f.close() if begin.startswith(codecs.BOM_UTF8): self.encoding = 'utf-8' self.bomLength = 3 elif begin.startswith(codecs.BOM_UTF16_LE): self.encoding = 'utf-16-le' self.bomLength = 2 elif begin.startswith(codecs.BOM_UTF16_BE): self.encoding = 'utf-16-be' self.bomLength = 2 elif begin.startswith(codecs.BOM_UTF32_LE): self.encoding = 'utf-32-le' self.bomLength = 4 elif begin.startswith(codecs.BOM_UTF32_BE): self.encoding = 'utf-32-be' self.bomLength = 4 if self.bomLength > 0: Console.v('BOM detected! Encoding: ' + self.encoding) else: Console.v('No BOM found. Using utf-8 as default encoding.') def read(self): if not self.read2(): Console.v('Fail to read the file with ' + self.encoding + '. Trying iso-8859-1.') self.encoding = 'iso-8859-1' self.read2() def read2(self): try: f = codecs.open(self.filename, 'r', self.encoding) self.lines = f.readlines() f.close() if len(self.lines) > 0 and self.bomLength > 0: self.lines[0] = self.lines[0].encode('utf-8').decode('utf-8-sig') except: return False return True def parse(self): el = None isJustNew = False nextCountReady = True count = 0 emptySub = [] skip = False for line in self.lines: l = line.encode('utf-8') \ .replace("\n", "") \ .replace("\r", "") if isJustNew: skip = False isJustNew = False nextCountReady = False times = l.split(' --> ') el.timeStart = self.parseTime(times[0]) el.timeEnd = self.parseTime(times[1]) elif nextCountReady and l.isdigit(): count += 1 Console.v("Reading SRT #" + str(l) + "\r", False) if int(l) != count: Console.nl() Console.w("Bad SRT number! Found: #" + str(l) + ", but should be #" + str(count)) if el != None: if el.text == '': emptySub.append(count - 1) else: self.elements.append(el) el = SubElement(0, 0, '') isJustNew = True else: if not skip: skip = self.exclude(l) if skip: Console.nl() Console.w("Skipping SRT '" + l + "'") elif len(l) > 0: l = l.replace("œ", "œ") \ .replace("’", "’") \ .replace("“", "“") \ .replace("”", "”") \ .replace("€", "€") \ .replace("´´", '"') \ .replace("´", "'") \ .replace("....", "…") \ .replace("...", "…") \ .replace(". . .", "…") l = self.applyLanguageOnLine(l) el.appendLine(l) nextCountReady = True if el != None and len(str(el)) > 0: self.elements.append(el) Console.v('Parsing complete: ' + str(len(self.elements)) + ' subtitles found!') Console.v(str(len(emptySub)) + ' subtitles were empty: ' + str(emptySub)) def parseTime(self, time): r1 = time.split(":") h = int(r1[0]) m = int(r1[1]) r2 = r1[2].split(',') s = int(r2[0]) ms = int(r2[1]) return ((h * 60 + m) * 60 + s) * 1000 + ms def applyLanguageOnLine(self, line): return line def exclude(self, line): l = line.lower() for x in exclusion: if l.find(x) != -1: return True return False class Srt2Ass: def __init__(self, srt, fregular, fsize, delete): ass = srt.replace('.srt', '.ass') subs = SubRipReader(srt) AdvancedSubStationAlphaWriter(ass, subs.elements, fregular, fsize) if delete: Console.l("Removing " + srt + "…") os.remove(srt) if __name__ == "__main__": parser = argparse.ArgumentParser(description='Convert SubRip (srt) files to Advanced Sub Station Alpha (ass) files.') parser.add_argument('file', metavar='file', nargs='*', help='the SubRip (srt) file') parser.add_argument('--font-regular', metavar='fontr', nargs='?', help='Name of the font to use for regular text. Default is "Cronos Pro".', default="Cronos Pro") parser.add_argument('--font-size', metavar='fonts', nargs='?', help='Font\'s size for both regular and italic fonts. Default is 24.', default="24") parser.add_argument('--delete', '-d', action='store_true', help='delete the original SubRip (srt) file') parser.add_argument('--verbose', '-v', action='store_true', help='print information about the process') args = parser.parse_args() Console.verbose = args.verbose srtfiles = [] if len(args.file) == 0: os.chdir(".") srtfiles = glob.glob("*.srt") elif len(args.file) == 1: os.chdir(".") srtfiles = glob.glob(args.file[0]) else: srtfiles = args.file for srt in srtfiles: Srt2Ass(srt, args.font_regular, args.font_size, args.delete)