{
"cells": [
{
"cell_type": "markdown",
"id": "4459c2c1",
"metadata": {},
"source": [
"# Transformers - Example of automatic speech recognition\n",
"Transformers provides APIs and tools to easily download and train state-of-the-art pretrained models. \n",
"Credits: Huggingface documentation and examples "
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "eee57435",
"metadata": {},
"outputs": [],
"source": [
"from transformers import pipeline"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "52c02f74",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d73869ec68ce4389963d968c2bc0b017",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading (…)lve/main/config.json: 0%| | 0.00/1.97k [00:00, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "7876afb78def4da887bb2592407fbc21",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading pytorch_model.bin: 0%| | 0.00/967M [00:00, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "614e84af1236445d921c4deff744831d",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading (…)neration_config.json: 0%| | 0.00/3.51k [00:00, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "0d41664ca4264e24b695c7c5a942bd62",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading (…)okenizer_config.json: 0%| | 0.00/842 [00:00, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "9ec43e8f0fc14ebc840fe8899982f3e9",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading (…)olve/main/vocab.json: 0%| | 0.00/1.04M [00:00, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6ed828c6658b4d03839d020741a4bd8e",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading (…)/main/tokenizer.json: 0%| | 0.00/2.20M [00:00, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "1cc881fa4ddb45c8908f31bc1cf8b158",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading (…)olve/main/merges.txt: 0%| | 0.00/494k [00:00, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "9ded2a1dc6db4c849c0a698e732ace0f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading (…)main/normalizer.json: 0%| | 0.00/52.7k [00:00, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "66463710f31944a39619817b988c2dec",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading (…)in/added_tokens.json: 0%| | 0.00/2.08k [00:00, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "fc793c7886c849afaecb4c7179d89847",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading (…)cial_tokens_map.json: 0%| | 0.00/2.08k [00:00, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "785180b095414d429e378cfafd379fe3",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading (…)rocessor_config.json: 0%| | 0.00/185k [00:00, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Create the transcriber pipeline with GPU\n",
"transcriber = pipeline(task=\"automatic-speech-recognition\", model=\"openai/whisper-small\", device=0) # Specify the GPU device index)\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "96c282d8",
"metadata": {},
"outputs": [],
"source": [
"speech_url=\"https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac\""
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "77ed6cd6",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Play th eaudio file\n",
"import IPython\n",
"IPython.display.Audio(url=speech_url)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "96a49da5",
"metadata": {},
"outputs": [],
"source": [
"# Install ffmpeg"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c578e043",
"metadata": {},
"outputs": [],
"source": [
"%%bash\n",
"wget -O ffmpeg.tar.xz https://johnvansickle.com/ffmpeg/builds/ffmpeg-git-amd64-static.tar.xz\n",
"tar xvf ffmpeg.tar.xz"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "4c8839a3",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"path = os.environ['PATH']\n",
"os.environ['PATH'] += \":./ffmpeg-git-20230313-amd64-static\" # customize the folder name"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "82e43a07",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/cvmfs/sft-nightlies.cern.ch/lcg/views/dev4cuda/Mon/x86_64-centos7-gcc11-opt/lib/python3.9/site-packages/transformers/generation/utils.py:1313: UserWarning: Using `max_length`'s default (448) to control the generation length. This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we recommend using `max_new_tokens` to control the maximum length of the generation.\n",
" warnings.warn(\n"
]
},
{
"data": {
"text/plain": [
"{'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its creed.'}"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# transcribe from speech_url\n",
"transcriber(speech_url)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "60b9965a",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}