Async google long speech recognition with polling
import io import os import time # Imports the Google Cloud client libraryfrom google.cloud import speech from google.cloud.speech import enums from google.cloud.speech import types def transcribe_gcs(gcs_uri): """Asynchronously transcribes the audio file specified by the gcs_uri.""" from google.cloud import speech from google.cloud.speech import enums from google.cloud.speech import types client = speech.SpeechClient() audio = types.RecognitionAudio(uri=gcs_uri) config = types.RecognitionConfig( # encoding=enums.RecognitionConfig.AudioEncoding.FLAC, # sample_rate_hertz=16000, language_code='en-US') operation = client.long_running_recognize(config, audio) print('Waiting for operation to complete...') retry_count = 1000000 while retry_count and not operation.done(): retry_count -=1 print("transcription not complete .. will try after 10 secs") time.sleep(10) # operation.poll() print(operation.done()) print(retry_count) response = operation.result() # Each result is for a consecutive portion of the audio. Iterate through # them to get the transcripts for the entire audio file. for result in response.results: # The first alternative is the most likely one for this portion. print(u'Transcript: {}'.format(result.alternatives[0].transcript)) print('Confidence: {}'.format(result.alternatives[0].confidence)) start_time = time.time() transcribe_gcs("gs://__________")# google cloud stroage uri total_time = time.time() - start_time print("seconds " + str(total_time )) print("minute "+ str((total_time/60)))
Notes:
A 30 mins video took
seconds 483.78773856163025 minute 8.063128976027171
time to complete
whereas autosub completed it in 3.5 mins
sync google speech recognition time taken data
1. a 20 sec audio file took 9.77 seconds
2. a 34 sec audio file took 14 sec
3. on long recog 1min 30 sec video took 28.8 secs
Comments
Post a Comment