{ "cells": [ { "cell_type": "markdown", "id": "4459c2c1", "metadata": {}, "source": [ "# Transformers - Example of automatic speech recognition\n", "Transformers provides APIs and tools to easily download and train state-of-the-art pretrained models. \n", "Credits: Huggingface documentation and examples " ] }, { "cell_type": "code", "execution_count": 1, "id": "eee57435", "metadata": {}, "outputs": [], "source": [ "from transformers import pipeline" ] }, { "cell_type": "code", "execution_count": 2, "id": "52c02f74", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "d73869ec68ce4389963d968c2bc0b017", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading (…)lve/main/config.json: 0%| | 0.00/1.97k [00:00\n", " \n", " Your browser does not support the audio element.\n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Play th eaudio file\n", "import IPython\n", "IPython.display.Audio(url=speech_url)" ] }, { "cell_type": "code", "execution_count": null, "id": "96a49da5", "metadata": {}, "outputs": [], "source": [ "# Install ffmpeg" ] }, { "cell_type": "code", "execution_count": null, "id": "c578e043", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "wget -O ffmpeg.tar.xz https://johnvansickle.com/ffmpeg/builds/ffmpeg-git-amd64-static.tar.xz\n", "tar xvf ffmpeg.tar.xz" ] }, { "cell_type": "code", "execution_count": 5, "id": "4c8839a3", "metadata": {}, "outputs": [], "source": [ "import os\n", "path = os.environ['PATH']\n", "os.environ['PATH'] += \":./ffmpeg-git-20230313-amd64-static\" # customize the folder name" ] }, { "cell_type": "code", "execution_count": 6, "id": "82e43a07", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/cvmfs/sft-nightlies.cern.ch/lcg/views/dev4cuda/Mon/x86_64-centos7-gcc11-opt/lib/python3.9/site-packages/transformers/generation/utils.py:1313: UserWarning: Using `max_length`'s default (448) to control the generation length. This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we recommend using `max_new_tokens` to control the maximum length of the generation.\n", " warnings.warn(\n" ] }, { "data": { "text/plain": [ "{'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its creed.'}" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# transcribe from speech_url\n", "transcriber(speech_url)" ] }, { "cell_type": "code", "execution_count": null, "id": "60b9965a", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.9" } }, "nbformat": 4, "nbformat_minor": 5 }