From 3dab48aa37734132883c87959269fddbc7a1ed6d Mon Sep 17 00:00:00 2001 From: Markus Schmidt Date: Mon, 11 Oct 2021 16:33:52 +0200 Subject: [PATCH] Added: functionality to create single and multiline webvtt --- controllers/default.py | 50 +++++++++++++--- languages/de.py | 3 + models/db.py | 3 +- modules/transcription_tools/__init__.py | 78 ++++++++++++++++++++++--- views/default/index.html | 8 ++- 5 files changed, 125 insertions(+), 17 deletions(-) diff --git a/controllers/default.py b/controllers/default.py index 8436a4c..1383928 100644 --- a/controllers/default.py +++ b/controllers/default.py @@ -4,24 +4,60 @@ # this file is released under public domain and you can use without limitations # ------------------------------------------------------------------------- +import io #from transcription_tools import create_vtt transcription_tools = local_import('transcription_tools', reload=True) +# app_folder = '' +# video_path = 'applications/transcription/uploads/media_file.file.bc22f2543688e775.aW1hZ2VmaWxtX2V0ZWFjaGluZ29yZy5tcDQ=.mp4' +model = 'private/model' + + # ---- example index page ---- def index(): media_files = db().select(db.media_file.ALL, orderby=db.media_file.title) return dict(media_files=media_files) + @auth.requires_membership('manager') -def manage(): +def manage(): grid = SQLFORM.smartgrid(db.media_file, linked_tables=['post']) return dict(grid=grid) -def vtt(): - video_path = '/home/mschmidt/Videos/100-Meinungen-Video-erstellen.mp4' - model_path = 'applications/transcription/private/model' - return dict(message=transcription_tools.create_vtt(model_path, video_path)) - + +def webvtt_single_line(): + media_file = db.media_file(request.args(0, cast=int)) or redirect(URL('index')) + media_path = '{}/{}/{}'.format(request.folder, 'uploads', media_file.file) + model_path = '{}/{}'.format(request.folder, model) + transkription = transcription_tools.vtt_single_line(model_path, media_path) + db(db.media_file.id == media_file.id).update(vtt_single_line=transkription) + redirect(request.env.http_referer) + +def webvtt(): + media_file = db.media_file(request.args(0, cast=int)) or redirect(URL('index')) + media_path = '{}/{}/{}'.format(request.folder, 'uploads', media_file.file) + model_path = '{}/{}'.format(request.folder, model) + transkription = transcription_tools.vtt(model_path, media_path) + db(db.media_file.id == media_file.id).update(vtt=transkription) + redirect(request.env.http_referer) + + +def download_webvtt_single_line(): + media_file = db.media_file(request.args(0, cast=int)) or redirect(URL('index')) + webvtt = media_file.vtt_single_line + response.headers['Content-Type']='text/vtt' + response.headers['Content-Disposition']='attachment; filename=transcript.vtt' + f = io.StringIO(webvtt) + return(f) + +def download_webvtt(): + media_file = db.media_file(request.args(0, cast=int)) or redirect(URL('index')) + webvtt = media_file.vtt + response.headers['Content-Type']='text/vtt' + response.headers['Content-Disposition']='attachment; filename=transcript.vtt' + f = io.StringIO(webvtt) + return(f) + def user(): - return dict(form=auth()) \ No newline at end of file + return dict(form=auth()) diff --git a/languages/de.py b/languages/de.py index 371ec5e..69f0690 100644 --- a/languages/de.py +++ b/languages/de.py @@ -188,6 +188,7 @@ 'No databases in this application': 'Keine Datenbank in dieser Anwendung', 'No records found': 'No records found', 'Not Authorized': 'Zugriff verweigert', +'not authorized': 'not authorized', 'not in': 'not in', 'Number of entries: **%s**': 'Nummer der Einträge: **%s**', 'Object or table name': 'Objekt oder Tabellenname', @@ -305,6 +306,8 @@ 'Videos': 'Videos', 'View': 'Ansicht', 'View %(entity)s': 'View %(entity)s', +'Vtt': 'Vtt', +'Vtt Single Line': 'Vtt Single Line', 'Vtt Url': 'Vtt Url', 'Welcome %(username)s! Click on the link %(link)s to verify your email': 'Willkommen %(username)s! Klicken Sie auf den Link %(link)s, um Ihre Email zu bestätigen', 'Welcome to web2py!': 'Willkommen bei web2py!', diff --git a/models/db.py b/models/db.py index 96b0692..859f81a 100644 --- a/models/db.py +++ b/models/db.py @@ -157,7 +157,8 @@ db = DAL("sqlite://storage.sqlite") db.define_table('media_file', Field('title', unique=True), - Field('vtt_url'), + Field('vtt_single_line'), + Field('vtt'), Field('file', 'upload', autodelete=True), format = '%(title)s') diff --git a/modules/transcription_tools/__init__.py b/modules/transcription_tools/__init__.py index ccca102..381402b 100644 --- a/modules/transcription_tools/__init__.py +++ b/modules/transcription_tools/__init__.py @@ -15,6 +15,7 @@ from vosk import Model, KaldiRecognizer, SetLogLevel +from webvtt import WebVTT, Caption import sys import os import wave @@ -22,17 +23,20 @@ import subprocess import srt import json import datetime +import textwrap -def create_vtt(model_path, video_path): +def vtt_single_line(model_path, media_path): sample_rate = 16000 model = Model(model_path) rec = KaldiRecognizer(model, sample_rate) rec.SetWords(True) + # 16bit mono with ffmpeg process = subprocess.Popen(['ffmpeg', '-loglevel', 'quiet', '-i', - video_path, - '-ar', str(sample_rate) , '-ac', '1', '-f', 's16le', '-'], + media_path, + '-ar', str(sample_rate), + '-ac', '1', '-f', 's16le', '-'], stdout=subprocess.PIPE) WORDS_PER_LINE = 7 @@ -54,13 +58,71 @@ def create_vtt(model_path, video_path): continue words = jres['result'] for j in range(0, len(words), WORDS_PER_LINE): - line = words[j : j + WORDS_PER_LINE] - s = srt.Subtitle(index=len(subs), + line = words[j: j + WORDS_PER_LINE] + s = srt.Subtitle( + index=len(subs), content=" ".join([l['word'] for l in line]), - start=datetime.timedelta(seconds=line[0]['start']), - end=datetime.timedelta(seconds=line[-1]['end'])) + start=datetime.timedelta(seconds=line[0]['start']), + end=datetime.timedelta(seconds=line[-1]['end']) + ) subs.append(s) return subs - return (srt.compose(transcribe())) + srt_str = srt.compose(transcribe()) # create srt string + # webvtt from srt with ffmepg + process1 = subprocess.Popen( + ['ffmpeg', '-loglevel', 'quiet', '-i', '-', '-f', 'webvtt', '-'], + stdin=subprocess.PIPE, stdout=subprocess.PIPE + ) + + webvtt = process1.communicate(input=bytes(srt_str, 'utf-8'))[0] + + return (webvtt) + + +def vtt(model_path, media_path): + sample_rate = 16000 + model = Model(model_path) + rec = KaldiRecognizer(model, sample_rate) + rec.SetWords(True) + + WORDS_PER_LINE = 7 + + def timeString(seconds): + minutes = seconds / 60 + seconds = seconds % 60 + hours = int(minutes / 60) + minutes = int(minutes % 60) + return '%i:%02i:%06.3f' % (hours, minutes, seconds) + + def transcribe(): + command = ['ffmpeg', '-nostdin', '-loglevel', 'quiet', '-i', media_path, + '-ar', str(sample_rate), '-ac', '1', '-f', 's16le', '-'] + process = subprocess.Popen(command, stdout=subprocess.PIPE) + + results = [] + while True: + data = process.stdout.read(4000) + if len(data) == 0: + break + if rec.AcceptWaveform(data): + results.append(rec.Result()) + results.append(rec.FinalResult()) + + vtt = WebVTT() + for i, res in enumerate(results): + words = json.loads(res).get('result') + if not words: + continue + + start = timeString(words[0]['start']) + end = timeString(words[-1]['end']) + content = ' '.join([w['word'] for w in words]) + + caption = Caption(start, end, textwrap.fill(content)) + vtt.captions.append(caption) + + return(vtt.content) + + return(transcribe()) diff --git a/views/default/index.html b/views/default/index.html index 845d213..fe0a618 100644 --- a/views/default/index.html +++ b/views/default/index.html @@ -5,7 +5,13 @@

Mediendateien