diff --git a/architects_of_future/FineTuned/RAG.ipynb b/architects_of_future/FineTuned/RAG.ipynb new file mode 100644 index 00000000..91072b08 --- /dev/null +++ b/architects_of_future/FineTuned/RAG.ipynb @@ -0,0 +1,1503 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "### Information Retreival - LangChain\n", + "- Without Using OpenAI Embeddings\n", + "- Without OpenAI LLM" + ], + "metadata": { + "id": "P6xh7KK_e5l9" + } + }, + { + "cell_type": "markdown", + "source": [ + "Two Applications:\n", + "- Text Documents\n", + "- Multiple PDF Files" + ], + "metadata": { + "id": "3HbUlsp3fS5p" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ZEZmjGQGW1ue", + "outputId": "2ee5d8cd-1826-4bb8-8fc9-be5eb3038062" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", + "Requirement already satisfied: langchain in /usr/local/lib/python3.9/dist-packages (0.0.142)\n", + "Requirement already satisfied: openapi-schema-pydantic<2.0,>=1.2 in /usr/local/lib/python3.9/dist-packages (from langchain) (1.2.4)\n", + "Requirement already satisfied: gptcache>=0.1.7 in /usr/local/lib/python3.9/dist-packages (from langchain) (0.1.13)\n", + "Requirement already satisfied: numexpr<3.0.0,>=2.8.4 in /usr/local/lib/python3.9/dist-packages (from langchain) (2.8.4)\n", + "Requirement already satisfied: numpy<2,>=1 in /usr/local/lib/python3.9/dist-packages (from langchain) (1.22.4)\n", + "Requirement already satisfied: dataclasses-json<0.6.0,>=0.5.7 in /usr/local/lib/python3.9/dist-packages (from langchain) (0.5.7)\n", + "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /usr/local/lib/python3.9/dist-packages (from langchain) (4.0.2)\n", + "Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.9/dist-packages (from langchain) (2.28.2)\n", + "Requirement already satisfied: SQLAlchemy<2,>=1 in /usr/local/lib/python3.9/dist-packages (from langchain) (1.4.47)\n", + "Requirement already satisfied: pydantic<2,>=1 in /usr/local/lib/python3.9/dist-packages (from langchain) (1.10.7)\n", + "Requirement already satisfied: PyYAML>=5.4.1 in /usr/local/lib/python3.9/dist-packages (from langchain) (6.0)\n", + "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.9/dist-packages (from langchain) (3.8.4)\n", + "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /usr/local/lib/python3.9/dist-packages (from langchain) (8.2.2)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.9/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (22.2.0)\n", + "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /usr/local/lib/python3.9/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (2.0.12)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.9/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.8.2)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.9/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.0.4)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.9/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.1)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.9/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.3)\n", + "Requirement already satisfied: marshmallow-enum<2.0.0,>=1.5.1 in /usr/local/lib/python3.9/dist-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain) (1.5.1)\n", + "Requirement already satisfied: marshmallow<4.0.0,>=3.3.0 in /usr/local/lib/python3.9/dist-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain) (3.19.0)\n", + "Requirement already satisfied: typing-inspect>=0.4.0 in /usr/local/lib/python3.9/dist-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain) (0.8.0)\n", + "Requirement already satisfied: cachetools in /usr/local/lib/python3.9/dist-packages (from gptcache>=0.1.7->langchain) (5.3.0)\n", + "Requirement already satisfied: openai in /usr/local/lib/python3.9/dist-packages (from gptcache>=0.1.7->langchain) (0.27.4)\n", + "Requirement already satisfied: typing-extensions>=4.2.0 in /usr/local/lib/python3.9/dist-packages (from pydantic<2,>=1->langchain) (4.5.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.9/dist-packages (from requests<3,>=2->langchain) (2022.12.7)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests<3,>=2->langchain) (3.4)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests<3,>=2->langchain) (1.26.15)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.9/dist-packages (from SQLAlchemy<2,>=1->langchain) (2.0.2)\n", + "Requirement already satisfied: packaging>=17.0 in /usr/local/lib/python3.9/dist-packages (from marshmallow<4.0.0,>=3.3.0->dataclasses-json<0.6.0,>=0.5.7->langchain) (23.0)\n", + "Requirement already satisfied: mypy-extensions>=0.3.0 in /usr/local/lib/python3.9/dist-packages (from typing-inspect>=0.4.0->dataclasses-json<0.6.0,>=0.5.7->langchain) (1.0.0)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.9/dist-packages (from openai->gptcache>=0.1.7->langchain) (4.65.0)\n", + "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", + "Requirement already satisfied: huggingface_hub in /usr/local/lib/python3.9/dist-packages (0.13.4)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.9/dist-packages (from huggingface_hub) (2.28.2)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.9/dist-packages (from huggingface_hub) (4.5.0)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.9/dist-packages (from huggingface_hub) (4.65.0)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.9/dist-packages (from huggingface_hub) (23.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.9/dist-packages (from huggingface_hub) (3.11.0)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.9/dist-packages (from huggingface_hub) (6.0)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests->huggingface_hub) (1.26.15)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests->huggingface_hub) (3.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.9/dist-packages (from requests->huggingface_hub) (2022.12.7)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.9/dist-packages (from requests->huggingface_hub) (2.0.12)\n", + "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", + "Requirement already satisfied: sentence_transformers in /usr/local/lib/python3.9/dist-packages (2.2.2)\n", + "Requirement already satisfied: sentencepiece in /usr/local/lib/python3.9/dist-packages (from sentence_transformers) (0.1.98)\n", + "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.9/dist-packages (from sentence_transformers) (1.2.2)\n", + "Requirement already satisfied: torch>=1.6.0 in /usr/local/lib/python3.9/dist-packages (from sentence_transformers) (2.0.0+cu118)\n", + "Requirement already satisfied: torchvision in /usr/local/lib/python3.9/dist-packages (from sentence_transformers) (0.15.1+cu118)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.9/dist-packages (from sentence_transformers) (1.22.4)\n", + "Requirement already satisfied: nltk in /usr/local/lib/python3.9/dist-packages (from sentence_transformers) (3.8.1)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.9/dist-packages (from sentence_transformers) (4.65.0)\n", + "Requirement already satisfied: transformers<5.0.0,>=4.6.0 in /usr/local/lib/python3.9/dist-packages (from sentence_transformers) (4.28.1)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.9/dist-packages (from sentence_transformers) (1.10.1)\n", + "Requirement already satisfied: huggingface-hub>=0.4.0 in /usr/local/lib/python3.9/dist-packages (from sentence_transformers) (0.13.4)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.9/dist-packages (from huggingface-hub>=0.4.0->sentence_transformers) (4.5.0)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.9/dist-packages (from huggingface-hub>=0.4.0->sentence_transformers) (2.28.2)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.9/dist-packages (from huggingface-hub>=0.4.0->sentence_transformers) (6.0)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.9/dist-packages (from huggingface-hub>=0.4.0->sentence_transformers) (23.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.9/dist-packages (from huggingface-hub>=0.4.0->sentence_transformers) (3.11.0)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.9/dist-packages (from torch>=1.6.0->sentence_transformers) (3.1.2)\n", + "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.9/dist-packages (from torch>=1.6.0->sentence_transformers) (2.0.0)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.9/dist-packages (from torch>=1.6.0->sentence_transformers) (3.1)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.9/dist-packages (from torch>=1.6.0->sentence_transformers) (1.11.1)\n", + "Requirement already satisfied: cmake in /usr/local/lib/python3.9/dist-packages (from triton==2.0.0->torch>=1.6.0->sentence_transformers) (3.25.2)\n", + "Requirement already satisfied: lit in /usr/local/lib/python3.9/dist-packages (from triton==2.0.0->torch>=1.6.0->sentence_transformers) (16.0.1)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.9/dist-packages (from transformers<5.0.0,>=4.6.0->sentence_transformers) (2022.10.31)\n", + "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.9/dist-packages (from transformers<5.0.0,>=4.6.0->sentence_transformers) (0.13.3)\n", + "Requirement already satisfied: click in /usr/local/lib/python3.9/dist-packages (from nltk->sentence_transformers) (8.1.3)\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.9/dist-packages (from nltk->sentence_transformers) (1.2.0)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.9/dist-packages (from scikit-learn->sentence_transformers) (3.1.0)\n", + "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.9/dist-packages (from torchvision->sentence_transformers) (9.5.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.9/dist-packages (from jinja2->torch>=1.6.0->sentence_transformers) (2.1.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.9/dist-packages (from requests->huggingface-hub>=0.4.0->sentence_transformers) (2.0.12)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests->huggingface-hub>=0.4.0->sentence_transformers) (3.4)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests->huggingface-hub>=0.4.0->sentence_transformers) (1.26.15)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.9/dist-packages (from requests->huggingface-hub>=0.4.0->sentence_transformers) (2022.12.7)\n", + "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.9/dist-packages (from sympy->torch>=1.6.0->sentence_transformers) (1.3.0)\n" + ] + } + ], + "source": [ + "!pip install langchain\n", + "!pip install huggingface_hub\n", + "!pip install sentence_transformers" + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Get HUGGINGFACEHUB_API_KEY" + ], + "metadata": { + "id": "Ra7uy0RhXd_2" + } + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "os.environ[\"HUGGINGFACEHUB_API_TOKEN\"] = \"HUGGINGFACEHUB_API_TOKEN\"" + ], + "metadata": { + "id": "8AZVtuZiW9Il" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "J_hOAJzQmQi6" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Download Text File" + ], + "metadata": { + "id": "hALLW9fyXWqu" + } + }, + { + "cell_type": "code", + "source": [ + "import requests\n", + "\n", + "url = \"https://raw.githubusercontent.com/hwchase17/langchain/master/docs/modules/state_of_the_union.txt\"\n", + "res = requests.get(url)\n", + "with open(\"state_of_the_union.txt\", \"w\") as f:\n", + " f.write(res.text)" + ], + "metadata": { + "id": "j9lr1BOMXTVr" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Document Loader\n", + "from langchain.document_loaders import TextLoader\n", + "loader = TextLoader('./state_of_the_union.txt')\n", + "documents = loader.load()" + ], + "metadata": { + "id": "sVio4mK5XVwR" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "documents" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Du41v2SOnXtG", + "outputId": "ca1ecbda-eeba-4078-a34d-73704448a9ee" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[Document(page_content='Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \\n\\nLast year COVID-19 kept us apart. This year we are finally together again. \\n\\nTonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \\n\\nWith a duty to one another to the American people to the Constitution. \\n\\nAnd with an unwavering resolve that freedom will always triumph over tyranny. \\n\\nSix days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \\n\\nHe thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \\n\\nHe met the Ukrainian people. \\n\\nFrom President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world. \\n\\nGroups of citizens blocking tanks with their bodies. Everyone from students to retirees teachers turned soldiers defending their homeland. \\n\\nIn this struggle as President Zelenskyy said in his speech to the European Parliament “Light will win over darkness.” The Ukrainian Ambassador to the United States is here tonight. \\n\\nLet each of us here tonight in this Chamber send an unmistakable signal to Ukraine and to the world. \\n\\nPlease rise if you are able and show that, Yes, we the United States of America stand with the Ukrainian people. \\n\\nThroughout our history we’ve learned this lesson when dictators do not pay a price for their aggression they cause more chaos. \\n\\nThey keep moving. \\n\\nAnd the costs and the threats to America and the world keep rising. \\n\\nThat’s why the NATO Alliance was created to secure peace and stability in Europe after World War 2. \\n\\nThe United States is a member along with 29 other nations. \\n\\nIt matters. American diplomacy matters. American resolve matters. \\n\\nPutin’s latest attack on Ukraine was premeditated and unprovoked. \\n\\nHe rejected repeated efforts at diplomacy. \\n\\nHe thought the West and NATO wouldn’t respond. And he thought he could divide us at home. Putin was wrong. We were ready. Here is what we did. \\n\\nWe prepared extensively and carefully. \\n\\nWe spent months building a coalition of other freedom-loving nations from Europe and the Americas to Asia and Africa to confront Putin. \\n\\nI spent countless hours unifying our European allies. We shared with the world in advance what we knew Putin was planning and precisely how he would try to falsely justify his aggression. \\n\\nWe countered Russia’s lies with truth. \\n\\nAnd now that he has acted the free world is holding him accountable. \\n\\nAlong with twenty-seven members of the European Union including France, Germany, Italy, as well as countries like the United Kingdom, Canada, Japan, Korea, Australia, New Zealand, and many others, even Switzerland. \\n\\nWe are inflicting pain on Russia and supporting the people of Ukraine. Putin is now isolated from the world more than ever. \\n\\nTogether with our allies –we are right now enforcing powerful economic sanctions. \\n\\nWe are cutting off Russia’s largest banks from the international financial system. \\n\\nPreventing Russia’s central bank from defending the Russian Ruble making Putin’s $630 Billion “war fund” worthless. \\n\\nWe are choking off Russia’s access to technology that will sap its economic strength and weaken its military for years to come. \\n\\nTonight I say to the Russian oligarchs and corrupt leaders who have bilked billions of dollars off this violent regime no more. \\n\\nThe U.S. Department of Justice is assembling a dedicated task force to go after the crimes of Russian oligarchs. \\n\\nWe are joining with our European allies to find and seize your yachts your luxury apartments your private jets. We are coming for your ill-begotten gains. \\n\\nAnd tonight I am announcing that we will join our allies in closing off American air space to all Russian flights – further isolating Russia – and adding an additional squeeze –on their economy. The Ruble has lost 30% of its value. \\n\\nThe Russian stock market has lost 40% of its value and trading remains suspended. Russia’s economy is reeling and Putin alone is to blame. \\n\\nTogether with our allies we are providing support to the Ukrainians in their fight for freedom. Military assistance. Economic assistance. Humanitarian assistance. \\n\\nWe are giving more than $1 Billion in direct assistance to Ukraine. \\n\\nAnd we will continue to aid the Ukrainian people as they defend their country and to help ease their suffering. \\n\\nLet me be clear, our forces are not engaged and will not engage in conflict with Russian forces in Ukraine. \\n\\nOur forces are not going to Europe to fight in Ukraine, but to defend our NATO Allies – in the event that Putin decides to keep moving west. \\n\\nFor that purpose we’ve mobilized American ground forces, air squadrons, and ship deployments to protect NATO countries including Poland, Romania, Latvia, Lithuania, and Estonia. \\n\\nAs I have made crystal clear the United States and our Allies will defend every inch of territory of NATO countries with the full force of our collective power. \\n\\nAnd we remain clear-eyed. The Ukrainians are fighting back with pure courage. But the next few days weeks, months, will be hard on them. \\n\\nPutin has unleashed violence and chaos. But while he may make gains on the battlefield – he will pay a continuing high price over the long run. \\n\\nAnd a proud Ukrainian people, who have known 30 years of independence, have repeatedly shown that they will not tolerate anyone who tries to take their country backwards. \\n\\nTo all Americans, I will be honest with you, as I’ve always promised. A Russian dictator, invading a foreign country, has costs around the world. \\n\\nAnd I’m taking robust action to make sure the pain of our sanctions is targeted at Russia’s economy. And I will use every tool at our disposal to protect American businesses and consumers. \\n\\nTonight, I can announce that the United States has worked with 30 other countries to release 60 Million barrels of oil from reserves around the world. \\n\\nAmerica will lead that effort, releasing 30 Million barrels from our own Strategic Petroleum Reserve. And we stand ready to do more if necessary, unified with our allies. \\n\\nThese steps will help blunt gas prices here at home. And I know the news about what’s happening can seem alarming. \\n\\nBut I want you to know that we are going to be okay. \\n\\nWhen the history of this era is written Putin’s war on Ukraine will have left Russia weaker and the rest of the world stronger. \\n\\nWhile it shouldn’t have taken something so terrible for people around the world to see what’s at stake now everyone sees it clearly. \\n\\nWe see the unity among leaders of nations and a more unified Europe a more unified West. And we see unity among the people who are gathering in cities in large crowds around the world even in Russia to demonstrate their support for Ukraine. \\n\\nIn the battle between democracy and autocracy, democracies are rising to the moment, and the world is clearly choosing the side of peace and security. \\n\\nThis is a real test. It’s going to take time. So let us continue to draw inspiration from the iron will of the Ukrainian people. \\n\\nTo our fellow Ukrainian Americans who forge a deep bond that connects our two nations we stand with you. \\n\\nPutin may circle Kyiv with tanks, but he will never gain the hearts and souls of the Ukrainian people. \\n\\nHe will never extinguish their love of freedom. He will never weaken the resolve of the free world. \\n\\nWe meet tonight in an America that has lived through two of the hardest years this nation has ever faced. \\n\\nThe pandemic has been punishing. \\n\\nAnd so many families are living paycheck to paycheck, struggling to keep up with the rising cost of food, gas, housing, and so much more. \\n\\nI understand. \\n\\nI remember when my Dad had to leave our home in Scranton, Pennsylvania to find work. I grew up in a family where if the price of food went up, you felt it. \\n\\nThat’s why one of the first things I did as President was fight to pass the American Rescue Plan. \\n\\nBecause people were hurting. We needed to act, and we did. \\n\\nFew pieces of legislation have done more in a critical moment in our history to lift us out of crisis. \\n\\nIt fueled our efforts to vaccinate the nation and combat COVID-19. It delivered immediate economic relief for tens of millions of Americans. \\n\\nHelped put food on their table, keep a roof over their heads, and cut the cost of health insurance. \\n\\nAnd as my Dad used to say, it gave people a little breathing room. \\n\\nAnd unlike the $2 Trillion tax cut passed in the previous administration that benefitted the top 1% of Americans, the American Rescue Plan helped working people—and left no one behind. \\n\\nAnd it worked. It created jobs. Lots of jobs. \\n\\nIn fact—our economy created over 6.5 Million new jobs just last year, more jobs created in one year \\nthan ever before in the history of America. \\n\\nOur economy grew at a rate of 5.7% last year, the strongest growth in nearly 40 years, the first step in bringing fundamental change to an economy that hasn’t worked for the working people of this nation for too long. \\n\\nFor the past 40 years we were told that if we gave tax breaks to those at the very top, the benefits would trickle down to everyone else. \\n\\nBut that trickle-down theory led to weaker economic growth, lower wages, bigger deficits, and the widest gap between those at the top and everyone else in nearly a century. \\n\\nVice President Harris and I ran for office with a new economic vision for America. \\n\\nInvest in America. Educate Americans. Grow the workforce. Build the economy from the bottom up \\nand the middle out, not from the top down. \\n\\nBecause we know that when the middle class grows, the poor have a ladder up and the wealthy do very well. \\n\\nAmerica used to have the best roads, bridges, and airports on Earth. \\n\\nNow our infrastructure is ranked 13th in the world. \\n\\nWe won’t be able to compete for the jobs of the 21st Century if we don’t fix that. \\n\\nThat’s why it was so important to pass the Bipartisan Infrastructure Law—the most sweeping investment to rebuild America in history. \\n\\nThis was a bipartisan effort, and I want to thank the members of both parties who worked to make it happen. \\n\\nWe’re done talking about infrastructure weeks. \\n\\nWe’re going to have an infrastructure decade. \\n\\nIt is going to transform America and put us on a path to win the economic competition of the 21st Century that we face with the rest of the world—particularly with China. \\n\\nAs I’ve told Xi Jinping, it is never a good bet to bet against the American people. \\n\\nWe’ll create good jobs for millions of Americans, modernizing roads, airports, ports, and waterways all across America. \\n\\nAnd we’ll do it all to withstand the devastating effects of the climate crisis and promote environmental justice. \\n\\nWe’ll build a national network of 500,000 electric vehicle charging stations, begin to replace poisonous lead pipes—so every child—and every American—has clean water to drink at home and at school, provide affordable high-speed internet for every American—urban, suburban, rural, and tribal communities. \\n\\n4,000 projects have already been announced. \\n\\nAnd tonight, I’m announcing that this year we will start fixing over 65,000 miles of highway and 1,500 bridges in disrepair. \\n\\nWhen we use taxpayer dollars to rebuild America – we are going to Buy American: buy American products to support American jobs. \\n\\nThe federal government spends about $600 Billion a year to keep the country safe and secure. \\n\\nThere’s been a law on the books for almost a century \\nto make sure taxpayers’ dollars support American jobs and businesses. \\n\\nEvery Administration says they’ll do it, but we are actually doing it. \\n\\nWe will buy American to make sure everything from the deck of an aircraft carrier to the steel on highway guardrails are made in America. \\n\\nBut to compete for the best jobs of the future, we also need to level the playing field with China and other competitors. \\n\\nThat’s why it is so important to pass the Bipartisan Innovation Act sitting in Congress that will make record investments in emerging technologies and American manufacturing. \\n\\nLet me give you one example of why it’s so important to pass it. \\n\\nIf you travel 20 miles east of Columbus, Ohio, you’ll find 1,000 empty acres of land. \\n\\nIt won’t look like much, but if you stop and look closely, you’ll see a “Field of dreams,” the ground on which America’s future will be built. \\n\\nThis is where Intel, the American company that helped build Silicon Valley, is going to build its $20 billion semiconductor “mega site”. \\n\\nUp to eight state-of-the-art factories in one place. 10,000 new good-paying jobs. \\n\\nSome of the most sophisticated manufacturing in the world to make computer chips the size of a fingertip that power the world and our everyday lives. \\n\\nSmartphones. The Internet. Technology we have yet to invent. \\n\\nBut that’s just the beginning. \\n\\nIntel’s CEO, Pat Gelsinger, who is here tonight, told me they are ready to increase their investment from \\n$20 billion to $100 billion. \\n\\nThat would be one of the biggest investments in manufacturing in American history. \\n\\nAnd all they’re waiting for is for you to pass this bill. \\n\\nSo let’s not wait any longer. Send it to my desk. I’ll sign it. \\n\\nAnd we will really take off. \\n\\nAnd Intel is not alone. \\n\\nThere’s something happening in America. \\n\\nJust look around and you’ll see an amazing story. \\n\\nThe rebirth of the pride that comes from stamping products “Made In America.” The revitalization of American manufacturing. \\n\\nCompanies are choosing to build new factories here, when just a few years ago, they would have built them overseas. \\n\\nThat’s what is happening. Ford is investing $11 billion to build electric vehicles, creating 11,000 jobs across the country. \\n\\nGM is making the largest investment in its history—$7 billion to build electric vehicles, creating 4,000 jobs in Michigan. \\n\\nAll told, we created 369,000 new manufacturing jobs in America just last year. \\n\\nPowered by people I’ve met like JoJo Burgess, from generations of union steelworkers from Pittsburgh, who’s here with us tonight. \\n\\nAs Ohio Senator Sherrod Brown says, “It’s time to bury the label “Rust Belt.” \\n\\nIt’s time. \\n\\nBut with all the bright spots in our economy, record job growth and higher wages, too many families are struggling to keep up with the bills. \\n\\nInflation is robbing them of the gains they might otherwise feel. \\n\\nI get it. That’s why my top priority is getting prices under control. \\n\\nLook, our economy roared back faster than most predicted, but the pandemic meant that businesses had a hard time hiring enough workers to keep up production in their factories. \\n\\nThe pandemic also disrupted global supply chains. \\n\\nWhen factories close, it takes longer to make goods and get them from the warehouse to the store, and prices go up. \\n\\nLook at cars. \\n\\nLast year, there weren’t enough semiconductors to make all the cars that people wanted to buy. \\n\\nAnd guess what, prices of automobiles went up. \\n\\nSo—we have a choice. \\n\\nOne way to fight inflation is to drive down wages and make Americans poorer. \\n\\nI have a better plan to fight inflation. \\n\\nLower your costs, not your wages. \\n\\nMake more cars and semiconductors in America. \\n\\nMore infrastructure and innovation in America. \\n\\nMore goods moving faster and cheaper in America. \\n\\nMore jobs where you can earn a good living in America. \\n\\nAnd instead of relying on foreign supply chains, let’s make it in America. \\n\\nEconomists call it “increasing the productive capacity of our economy.” \\n\\nI call it building a better America. \\n\\nMy plan to fight inflation will lower your costs and lower the deficit. \\n\\n17 Nobel laureates in economics say my plan will ease long-term inflationary pressures. Top business leaders and most Americans support my plan. And here’s the plan: \\n\\nFirst – cut the cost of prescription drugs. Just look at insulin. One in ten Americans has diabetes. In Virginia, I met a 13-year-old boy named Joshua Davis. \\n\\nHe and his Dad both have Type 1 diabetes, which means they need insulin every day. Insulin costs about $10 a vial to make. \\n\\nBut drug companies charge families like Joshua and his Dad up to 30 times more. I spoke with Joshua’s mom. \\n\\nImagine what it’s like to look at your child who needs insulin and have no idea how you’re going to pay for it. \\n\\nWhat it does to your dignity, your ability to look your child in the eye, to be the parent you expect to be. \\n\\nJoshua is here with us tonight. Yesterday was his birthday. Happy birthday, buddy. \\n\\nFor Joshua, and for the 200,000 other young people with Type 1 diabetes, let’s cap the cost of insulin at $35 a month so everyone can afford it. \\n\\nDrug companies will still do very well. And while we’re at it let Medicare negotiate lower prices for prescription drugs, like the VA already does. \\n\\nLook, the American Rescue Plan is helping millions of families on Affordable Care Act plans save $2,400 a year on their health care premiums. Let’s close the coverage gap and make those savings permanent. \\n\\nSecond – cut energy costs for families an average of $500 a year by combatting climate change. \\n\\nLet’s provide investments and tax credits to weatherize your homes and businesses to be energy efficient and you get a tax credit; double America’s clean energy production in solar, wind, and so much more; lower the price of electric vehicles, saving you another $80 a month because you’ll never have to pay at the gas pump again. \\n\\nThird – cut the cost of child care. Many families pay up to $14,000 a year for child care per child. \\n\\nMiddle-class and working families shouldn’t have to pay more than 7% of their income for care of young children. \\n\\nMy plan will cut the cost in half for most families and help parents, including millions of women, who left the workforce during the pandemic because they couldn’t afford child care, to be able to get back to work. \\n\\nMy plan doesn’t stop there. It also includes home and long-term care. More affordable housing. And Pre-K for every 3- and 4-year-old. \\n\\nAll of these will lower costs. \\n\\nAnd under my plan, nobody earning less than $400,000 a year will pay an additional penny in new taxes. Nobody. \\n\\nThe one thing all Americans agree on is that the tax system is not fair. We have to fix it. \\n\\nI’m not looking to punish anyone. But let’s make sure corporations and the wealthiest Americans start paying their fair share. \\n\\nJust last year, 55 Fortune 500 corporations earned $40 billion in profits and paid zero dollars in federal income tax. \\n\\nThat’s simply not fair. That’s why I’ve proposed a 15% minimum tax rate for corporations. \\n\\nWe got more than 130 countries to agree on a global minimum tax rate so companies can’t get out of paying their taxes at home by shipping jobs and factories overseas. \\n\\nThat’s why I’ve proposed closing loopholes so the very wealthy don’t pay a lower tax rate than a teacher or a firefighter. \\n\\nSo that’s my plan. It will grow the economy and lower costs for families. \\n\\nSo what are we waiting for? Let’s get this done. And while you’re at it, confirm my nominees to the Federal Reserve, which plays a critical role in fighting inflation. \\n\\nMy plan will not only lower costs to give families a fair shot, it will lower the deficit. \\n\\nThe previous Administration not only ballooned the deficit with tax cuts for the very wealthy and corporations, it undermined the watchdogs whose job was to keep pandemic relief funds from being wasted. \\n\\nBut in my administration, the watchdogs have been welcomed back. \\n\\nWe’re going after the criminals who stole billions in relief money meant for small businesses and millions of Americans. \\n\\nAnd tonight, I’m announcing that the Justice Department will name a chief prosecutor for pandemic fraud. \\n\\nBy the end of this year, the deficit will be down to less than half what it was before I took office. \\n\\nThe only president ever to cut the deficit by more than one trillion dollars in a single year. \\n\\nLowering your costs also means demanding more competition. \\n\\nI’m a capitalist, but capitalism without competition isn’t capitalism. \\n\\nIt’s exploitation—and it drives up prices. \\n\\nWhen corporations don’t have to compete, their profits go up, your prices go up, and small businesses and family farmers and ranchers go under. \\n\\nWe see it happening with ocean carriers moving goods in and out of America. \\n\\nDuring the pandemic, these foreign-owned companies raised prices by as much as 1,000% and made record profits. \\n\\nTonight, I’m announcing a crackdown on these companies overcharging American businesses and consumers. \\n\\nAnd as Wall Street firms take over more nursing homes, quality in those homes has gone down and costs have gone up. \\n\\nThat ends on my watch. \\n\\nMedicare is going to set higher standards for nursing homes and make sure your loved ones get the care they deserve and expect. \\n\\nWe’ll also cut costs and keep the economy going strong by giving workers a fair shot, provide more training and apprenticeships, hire them based on their skills not degrees. \\n\\nLet’s pass the Paycheck Fairness Act and paid leave. \\n\\nRaise the minimum wage to $15 an hour and extend the Child Tax Credit, so no one has to raise a family in poverty. \\n\\nLet’s increase Pell Grants and increase our historic support of HBCUs, and invest in what Jill—our First Lady who teaches full-time—calls America’s best-kept secret: community colleges. \\n\\nAnd let’s pass the PRO Act when a majority of workers want to form a union—they shouldn’t be stopped. \\n\\nWhen we invest in our workers, when we build the economy from the bottom up and the middle out together, we can do something we haven’t done in a long time: build a better America. \\n\\nFor more than two years, COVID-19 has impacted every decision in our lives and the life of the nation. \\n\\nAnd I know you’re tired, frustrated, and exhausted. \\n\\nBut I also know this. \\n\\nBecause of the progress we’ve made, because of your resilience and the tools we have, tonight I can say \\nwe are moving forward safely, back to more normal routines. \\n\\nWe’ve reached a new moment in the fight against COVID-19, with severe cases down to a level not seen since last July. \\n\\nJust a few days ago, the Centers for Disease Control and Prevention—the CDC—issued new mask guidelines. \\n\\nUnder these new guidelines, most Americans in most of the country can now be mask free. \\n\\nAnd based on the projections, more of the country will reach that point across the next couple of weeks. \\n\\nThanks to the progress we have made this past year, COVID-19 need no longer control our lives. \\n\\nI know some are talking about “living with COVID-19”. Tonight – I say that we will never just accept living with COVID-19. \\n\\nWe will continue to combat the virus as we do other diseases. And because this is a virus that mutates and spreads, we will stay on guard. \\n\\nHere are four common sense steps as we move forward safely. \\n\\nFirst, stay protected with vaccines and treatments. We know how incredibly effective vaccines are. If you’re vaccinated and boosted you have the highest degree of protection. \\n\\nWe will never give up on vaccinating more Americans. Now, I know parents with kids under 5 are eager to see a vaccine authorized for their children. \\n\\nThe scientists are working hard to get that done and we’ll be ready with plenty of vaccines when they do. \\n\\nWe’re also ready with anti-viral treatments. If you get COVID-19, the Pfizer pill reduces your chances of ending up in the hospital by 90%. \\n\\nWe’ve ordered more of these pills than anyone in the world. And Pfizer is working overtime to get us 1 Million pills this month and more than double that next month. \\n\\nAnd we’re launching the “Test to Treat” initiative so people can get tested at a pharmacy, and if they’re positive, receive antiviral pills on the spot at no cost. \\n\\nIf you’re immunocompromised or have some other vulnerability, we have treatments and free high-quality masks. \\n\\nWe’re leaving no one behind or ignoring anyone’s needs as we move forward. \\n\\nAnd on testing, we have made hundreds of millions of tests available for you to order for free. \\n\\nEven if you already ordered free tests tonight, I am announcing that you can order more from covidtests.gov starting next week. \\n\\nSecond – we must prepare for new variants. Over the past year, we’ve gotten much better at detecting new variants. \\n\\nIf necessary, we’ll be able to deploy new vaccines within 100 days instead of many more months or years. \\n\\nAnd, if Congress provides the funds we need, we’ll have new stockpiles of tests, masks, and pills ready if needed. \\n\\nI cannot promise a new variant won’t come. But I can promise you we’ll do everything within our power to be ready if it does. \\n\\nThird – we can end the shutdown of schools and businesses. We have the tools we need. \\n\\nIt’s time for Americans to get back to work and fill our great downtowns again. People working from home can feel safe to begin to return to the office. \\n\\nWe’re doing that here in the federal government. The vast majority of federal workers will once again work in person. \\n\\nOur schools are open. Let’s keep it that way. Our kids need to be in school. \\n\\nAnd with 75% of adult Americans fully vaccinated and hospitalizations down by 77%, most Americans can remove their masks, return to work, stay in the classroom, and move forward safely. \\n\\nWe achieved this because we provided free vaccines, treatments, tests, and masks. \\n\\nOf course, continuing this costs money. \\n\\nI will soon send Congress a request. \\n\\nThe vast majority of Americans have used these tools and may want to again, so I expect Congress to pass it quickly. \\n\\nFourth, we will continue vaccinating the world. \\n\\nWe’ve sent 475 Million vaccine doses to 112 countries, more than any other nation. \\n\\nAnd we won’t stop. \\n\\nWe have lost so much to COVID-19. Time with one another. And worst of all, so much loss of life. \\n\\nLet’s use this moment to reset. Let’s stop looking at COVID-19 as a partisan dividing line and see it for what it is: A God-awful disease. \\n\\nLet’s stop seeing each other as enemies, and start seeing each other for who we really are: Fellow Americans. \\n\\nWe can’t change how divided we’ve been. But we can change how we move forward—on COVID-19 and other issues we must face together. \\n\\nI recently visited the New York City Police Department days after the funerals of Officer Wilbert Mora and his partner, Officer Jason Rivera. \\n\\nThey were responding to a 9-1-1 call when a man shot and killed them with a stolen gun. \\n\\nOfficer Mora was 27 years old. \\n\\nOfficer Rivera was 22. \\n\\nBoth Dominican Americans who’d grown up on the same streets they later chose to patrol as police officers. \\n\\nI spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves. \\n\\nI’ve worked on these issues a long time. \\n\\nI know what works: Investing in crime preventionand community police officers who’ll walk the beat, who’ll know the neighborhood, and who can restore trust and safety. \\n\\nSo let’s not abandon our streets. Or choose between safety and equal justice. \\n\\nLet’s come together to protect our communities, restore trust, and hold law enforcement accountable. \\n\\nThat’s why the Justice Department required body cameras, banned chokeholds, and restricted no-knock warrants for its officers. \\n\\nThat’s why the American Rescue Plan provided $350 Billion that cities, states, and counties can use to hire more police and invest in proven strategies like community violence interruption—trusted messengers breaking the cycle of violence and trauma and giving young people hope. \\n\\nWe should all agree: The answer is not to Defund the police. The answer is to FUND the police with the resources and training they need to protect our communities. \\n\\nI ask Democrats and Republicans alike: Pass my budget and keep our neighborhoods safe. \\n\\nAnd I will keep doing everything in my power to crack down on gun trafficking and ghost guns you can buy online and make at home—they have no serial numbers and can’t be traced. \\n\\nAnd I ask Congress to pass proven measures to reduce gun violence. Pass universal background checks. Why should anyone on a terrorist list be able to purchase a weapon? \\n\\nBan assault weapons and high-capacity magazines. \\n\\nRepeal the liability shield that makes gun manufacturers the only industry in America that can’t be sued. \\n\\nThese laws don’t infringe on the Second Amendment. They save lives. \\n\\nThe most fundamental right in America is the right to vote – and to have it counted. And it’s under assault. \\n\\nIn state after state, new laws have been passed, not only to suppress the vote, but to subvert entire elections. \\n\\nWe cannot let this happen. \\n\\nTonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence. \\n\\nA former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWe’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWe’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWe’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders. \\n\\nWe can do all this while keeping lit the torch of liberty that has led generations of immigrants to this land—my forefathers and so many of yours. \\n\\nProvide a pathway to citizenship for Dreamers, those on temporary status, farm workers, and essential workers. \\n\\nRevise our laws so businesses have the workers they need and families don’t wait decades to reunite. \\n\\nIt’s not only the right thing to do—it’s the economically smart thing to do. \\n\\nThat’s why immigration reform is supported by everyone from labor unions to religious leaders to the U.S. Chamber of Commerce. \\n\\nLet’s get it done once and for all. \\n\\nAdvancing liberty and justice also requires protecting the rights of women. \\n\\nThe constitutional right affirmed in Roe v. Wade—standing precedent for half a century—is under attack as never before. \\n\\nIf we want to go forward—not backward—we must protect access to health care. Preserve a woman’s right to choose. And let’s continue to advance maternal health care in America. \\n\\nAnd for our LGBTQ+ Americans, let’s finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong. \\n\\nAs I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \\n\\nWhile it often appears that we never agree, that isn’t true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice. \\n\\nAnd soon, we’ll strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \\n\\nSo tonight I’m offering a Unity Agenda for the Nation. Four big things we can do together. \\n\\nFirst, beat the opioid epidemic. \\n\\nThere is so much we can do. Increase funding for prevention, treatment, harm reduction, and recovery. \\n\\nGet rid of outdated rules that stop doctors from prescribing treatments. And stop the flow of illicit drugs by working with state and local law enforcement to go after traffickers. \\n\\nIf you’re suffering from addiction, know you are not alone. I believe in recovery, and I celebrate the 23 million Americans in recovery. \\n\\nSecond, let’s take on mental health. Especially among our children, whose lives and education have been turned upside down. \\n\\nThe American Rescue Plan gave schools money to hire teachers and help students make up for lost learning. \\n\\nI urge every parent to make sure your school does just that. And we can all play a part—sign up to be a tutor or a mentor. \\n\\nChildren were also struggling before the pandemic. Bullying, violence, trauma, and the harms of social media. \\n\\nAs Frances Haugen, who is here with us tonight, has shown, we must hold social media platforms accountable for the national experiment they’re conducting on our children for profit. \\n\\nIt’s time to strengthen privacy protections, ban targeted advertising to children, demand tech companies stop collecting personal data on our children. \\n\\nAnd let’s get all Americans the mental health services they need. More people they can turn to for help, and full parity between physical and mental health care. \\n\\nThird, support our veterans. \\n\\nVeterans are the best of us. \\n\\nI’ve always believed that we have a sacred obligation to equip all those we send to war and care for them and their families when they come home. \\n\\nMy administration is providing assistance with job training and housing, and now helping lower-income veterans get VA care debt-free. \\n\\nOur troops in Iraq and Afghanistan faced many dangers. \\n\\nOne was stationed at bases and breathing in toxic smoke from “burn pits” that incinerated wastes of war—medical and hazard material, jet fuel, and more. \\n\\nWhen they came home, many of the world’s fittest and best trained warriors were never the same. \\n\\nHeadaches. Numbness. Dizziness. \\n\\nA cancer that would put them in a flag-draped coffin. \\n\\nI know. \\n\\nOne of those soldiers was my son Major Beau Biden. \\n\\nWe don’t know for sure if a burn pit was the cause of his brain cancer, or the diseases of so many of our troops. \\n\\nBut I’m committed to finding out everything we can. \\n\\nCommitted to military families like Danielle Robinson from Ohio. \\n\\nThe widow of Sergeant First Class Heath Robinson. \\n\\nHe was born a soldier. Army National Guard. Combat medic in Kosovo and Iraq. \\n\\nStationed near Baghdad, just yards from burn pits the size of football fields. \\n\\nHeath’s widow Danielle is here with us tonight. They loved going to Ohio State football games. He loved building Legos with their daughter. \\n\\nBut cancer from prolonged exposure to burn pits ravaged Heath’s lungs and body. \\n\\nDanielle says Heath was a fighter to the very end. \\n\\nHe didn’t know how to stop fighting, and neither did she. \\n\\nThrough her pain she found purpose to demand we do better. \\n\\nTonight, Danielle—we are. \\n\\nThe VA is pioneering new ways of linking toxic exposures to diseases, already helping more veterans get benefits. \\n\\nAnd tonight, I’m announcing we’re expanding eligibility to veterans suffering from nine respiratory cancers. \\n\\nI’m also calling on Congress: pass a law to make sure veterans devastated by toxic exposures in Iraq and Afghanistan finally get the benefits and comprehensive health care they deserve. \\n\\nAnd fourth, let’s end cancer as we know it. \\n\\nThis is personal to me and Jill, to Kamala, and to so many of you. \\n\\nCancer is the #2 cause of death in America–second only to heart disease. \\n\\nLast month, I announced our plan to supercharge \\nthe Cancer Moonshot that President Obama asked me to lead six years ago. \\n\\nOur goal is to cut the cancer death rate by at least 50% over the next 25 years, turn more cancers from death sentences into treatable diseases. \\n\\nMore support for patients and families. \\n\\nTo get there, I call on Congress to fund ARPA-H, the Advanced Research Projects Agency for Health. \\n\\nIt’s based on DARPA—the Defense Department project that led to the Internet, GPS, and so much more. \\n\\nARPA-H will have a singular purpose—to drive breakthroughs in cancer, Alzheimer’s, diabetes, and more. \\n\\nA unity agenda for the nation. \\n\\nWe can do this. \\n\\nMy fellow Americans—tonight , we have gathered in a sacred space—the citadel of our democracy. \\n\\nIn this Capitol, generation after generation, Americans have debated great questions amid great strife, and have done great things. \\n\\nWe have fought for freedom, expanded liberty, defeated totalitarianism and terror. \\n\\nAnd built the strongest, freest, and most prosperous nation the world has ever known. \\n\\nNow is the hour. \\n\\nOur moment of responsibility. \\n\\nOur test of resolve and conscience, of history itself. \\n\\nIt is in this moment that our character is formed. Our purpose is found. Our future is forged. \\n\\nWell I know this nation. \\n\\nWe will meet the test. \\n\\nTo protect freedom and liberty, to expand fairness and opportunity. \\n\\nWe will save democracy. \\n\\nAs hard as these times have been, I am more optimistic about America today than I have been my whole life. \\n\\nBecause I see the future that is within our grasp. \\n\\nBecause I know there is simply nothing beyond our capacity. \\n\\nWe are the only nation on Earth that has always turned every crisis we have faced into an opportunity. \\n\\nThe only nation that can be defined by a single word: possibilities. \\n\\nSo on this night, in our 245th year as a nation, I have come to report on the State of the Union. \\n\\nAnd my report is this: the State of the Union is strong—because you, the American people, are strong. \\n\\nWe are stronger today than we were a year ago. \\n\\nAnd we will be stronger a year from now than we are today. \\n\\nNow is our moment to meet and overcome the challenges of our time. \\n\\nAnd we will, as one people. \\n\\nOne America. \\n\\nThe United States of America. \\n\\nMay God bless you all. May God protect our troops.', metadata={'source': './state_of_the_union.txt'})]" + ] + }, + "metadata": {}, + "execution_count": 97 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import textwrap\n", + "\n", + "def wrap_text_preserve_newlines(text, width=110):\n", + " # Split the input text into lines based on newline characters\n", + " lines = text.split('\\n')\n", + "\n", + " # Wrap each line individually\n", + " wrapped_lines = [textwrap.fill(line, width=width) for line in lines]\n", + "\n", + " # Join the wrapped lines back together using newline characters\n", + " wrapped_text = '\\n'.join(wrapped_lines)\n", + "\n", + " return wrapped_text" + ], + "metadata": { + "id": "rZjjKyi1m_lA" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print(wrap_text_preserve_newlines(str(documents[0])))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mV04kCtEm_tF", + "outputId": "b8a72af2-55e1-4ed0-bd88-63b8d39f1279" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "page_content='Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress\n", + "and the Cabinet. Justices of the Supreme Court. My fellow Americans. \\n\\nLast year COVID-19 kept us apart.\n", + "This year we are finally together again. \\n\\nTonight, we meet as Democrats Republicans and Independents. But\n", + "most importantly as Americans. \\n\\nWith a duty to one another to the American people to the Constitution.\n", + "\\n\\nAnd with an unwavering resolve that freedom will always triumph over tyranny. \\n\\nSix days ago, Russia’s\n", + "Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his\n", + "menacing ways. But he badly miscalculated. \\n\\nHe thought he could roll into Ukraine and the world would roll\n", + "over. Instead he met a wall of strength he never imagined. \\n\\nHe met the Ukrainian people. \\n\\nFrom President\n", + "Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world.\n", + "\\n\\nGroups of citizens blocking tanks with their bodies. Everyone from students to retirees teachers turned\n", + "soldiers defending their homeland. \\n\\nIn this struggle as President Zelenskyy said in his speech to the\n", + "European Parliament “Light will win over darkness.” The Ukrainian Ambassador to the United States is here\n", + "tonight. \\n\\nLet each of us here tonight in this Chamber send an unmistakable signal to Ukraine and to the\n", + "world. \\n\\nPlease rise if you are able and show that, Yes, we the United States of America stand with the\n", + "Ukrainian people. \\n\\nThroughout our history we’ve learned this lesson when dictators do not pay a price for\n", + "their aggression they cause more chaos. \\n\\nThey keep moving. \\n\\nAnd the costs and the threats to America\n", + "and the world keep rising. \\n\\nThat’s why the NATO Alliance was created to secure peace and stability in\n", + "Europe after World War 2. \\n\\nThe United States is a member along with 29 other nations. \\n\\nIt matters.\n", + "American diplomacy matters. American resolve matters. \\n\\nPutin’s latest attack on Ukraine was premeditated\n", + "and unprovoked. \\n\\nHe rejected repeated efforts at diplomacy. \\n\\nHe thought the West and NATO wouldn’t\n", + "respond. And he thought he could divide us at home. Putin was wrong. We were ready. Here is what we did.\n", + "\\n\\nWe prepared extensively and carefully. \\n\\nWe spent months building a coalition of other freedom-loving\n", + "nations from Europe and the Americas to Asia and Africa to confront Putin. \\n\\nI spent countless hours\n", + "unifying our European allies. We shared with the world in advance what we knew Putin was planning and\n", + "precisely how he would try to falsely justify his aggression. \\n\\nWe countered Russia’s lies with truth.\n", + "\\n\\nAnd now that he has acted the free world is holding him accountable. \\n\\nAlong with twenty-seven members\n", + "of the European Union including France, Germany, Italy, as well as countries like the United Kingdom, Canada,\n", + "Japan, Korea, Australia, New Zealand, and many others, even Switzerland. \\n\\nWe are inflicting pain on Russia\n", + "and supporting the people of Ukraine. Putin is now isolated from the world more than ever. \\n\\nTogether with\n", + "our allies –we are right now enforcing powerful economic sanctions. \\n\\nWe are cutting off Russia’s largest\n", + "banks from the international financial system. \\n\\nPreventing Russia’s central bank from defending the\n", + "Russian Ruble making Putin’s $630 Billion “war fund” worthless. \\n\\nWe are choking off Russia’s access to\n", + "technology that will sap its economic strength and weaken its military for years to come. \\n\\nTonight I say\n", + "to the Russian oligarchs and corrupt leaders who have bilked billions of dollars off this violent regime no\n", + "more. \\n\\nThe U.S. Department of Justice is assembling a dedicated task force to go after the crimes of\n", + "Russian oligarchs. \\n\\nWe are joining with our European allies to find and seize your yachts your luxury\n", + "apartments your private jets. We are coming for your ill-begotten gains. \\n\\nAnd tonight I am announcing that\n", + "we will join our allies in closing off American air space to all Russian flights – further isolating Russia –\n", + "and adding an additional squeeze –on their economy. The Ruble has lost 30% of its value. \\n\\nThe Russian stock\n", + "market has lost 40% of its value and trading remains suspended. Russia’s economy is reeling and Putin alone is\n", + "to blame. \\n\\nTogether with our allies we are providing support to the Ukrainians in their fight for freedom.\n", + "Military assistance. Economic assistance. Humanitarian assistance. \\n\\nWe are giving more than $1 Billion in\n", + "direct assistance to Ukraine. \\n\\nAnd we will continue to aid the Ukrainian people as they defend their\n", + "country and to help ease their suffering. \\n\\nLet me be clear, our forces are not engaged and will not engage\n", + "in conflict with Russian forces in Ukraine. \\n\\nOur forces are not going to Europe to fight in Ukraine, but\n", + "to defend our NATO Allies – in the event that Putin decides to keep moving west. \\n\\nFor that purpose we’ve\n", + "mobilized American ground forces, air squadrons, and ship deployments to protect NATO countries including\n", + "Poland, Romania, Latvia, Lithuania, and Estonia. \\n\\nAs I have made crystal clear the United States and our\n", + "Allies will defend every inch of territory of NATO countries with the full force of our collective power.\n", + "\\n\\nAnd we remain clear-eyed. The Ukrainians are fighting back with pure courage. But the next few days weeks,\n", + "months, will be hard on them. \\n\\nPutin has unleashed violence and chaos. But while he may make gains on the\n", + "battlefield – he will pay a continuing high price over the long run. \\n\\nAnd a proud Ukrainian people, who\n", + "have known 30 years of independence, have repeatedly shown that they will not tolerate anyone who tries to\n", + "take their country backwards. \\n\\nTo all Americans, I will be honest with you, as I’ve always promised. A\n", + "Russian dictator, invading a foreign country, has costs around the world. \\n\\nAnd I’m taking robust action to\n", + "make sure the pain of our sanctions is targeted at Russia’s economy. And I will use every tool at our\n", + "disposal to protect American businesses and consumers. \\n\\nTonight, I can announce that the United States has\n", + "worked with 30 other countries to release 60 Million barrels of oil from reserves around the world.\n", + "\\n\\nAmerica will lead that effort, releasing 30 Million barrels from our own Strategic Petroleum Reserve. And\n", + "we stand ready to do more if necessary, unified with our allies. \\n\\nThese steps will help blunt gas prices\n", + "here at home. And I know the news about what’s happening can seem alarming. \\n\\nBut I want you to know that we\n", + "are going to be okay. \\n\\nWhen the history of this era is written Putin’s war on Ukraine will have left Russia\n", + "weaker and the rest of the world stronger. \\n\\nWhile it shouldn’t have taken something so terrible for people\n", + "around the world to see what’s at stake now everyone sees it clearly. \\n\\nWe see the unity among leaders of\n", + "nations and a more unified Europe a more unified West. And we see unity among the people who are gathering in\n", + "cities in large crowds around the world even in Russia to demonstrate their support for Ukraine. \\n\\nIn the\n", + "battle between democracy and autocracy, democracies are rising to the moment, and the world is clearly\n", + "choosing the side of peace and security. \\n\\nThis is a real test. It’s going to take time. So let us continue\n", + "to draw inspiration from the iron will of the Ukrainian people. \\n\\nTo our fellow Ukrainian Americans who\n", + "forge a deep bond that connects our two nations we stand with you. \\n\\nPutin may circle Kyiv with tanks, but\n", + "he will never gain the hearts and souls of the Ukrainian people. \\n\\nHe will never extinguish their love of\n", + "freedom. He will never weaken the resolve of the free world. \\n\\nWe meet tonight in an America that has lived\n", + "through two of the hardest years this nation has ever faced. \\n\\nThe pandemic has been punishing. \\n\\nAnd so\n", + "many families are living paycheck to paycheck, struggling to keep up with the rising cost of food, gas,\n", + "housing, and so much more. \\n\\nI understand. \\n\\nI remember when my Dad had to leave our home in Scranton,\n", + "Pennsylvania to find work. I grew up in a family where if the price of food went up, you felt it. \\n\\nThat’s\n", + "why one of the first things I did as President was fight to pass the American Rescue Plan. \\n\\nBecause people\n", + "were hurting. We needed to act, and we did. \\n\\nFew pieces of legislation have done more in a critical moment\n", + "in our history to lift us out of crisis. \\n\\nIt fueled our efforts to vaccinate the nation and combat\n", + "COVID-19. It delivered immediate economic relief for tens of millions of Americans. \\n\\nHelped put food on\n", + "their table, keep a roof over their heads, and cut the cost of health insurance. \\n\\nAnd as my Dad used to\n", + "say, it gave people a little breathing room. \\n\\nAnd unlike the $2 Trillion tax cut passed in the previous\n", + "administration that benefitted the top 1% of Americans, the American Rescue Plan helped working people—and\n", + "left no one behind. \\n\\nAnd it worked. It created jobs. Lots of jobs. \\n\\nIn fact—our economy created over 6.5\n", + "Million new jobs just last year, more jobs created in one year \\nthan ever before in the history of America.\n", + "\\n\\nOur economy grew at a rate of 5.7% last year, the strongest growth in nearly 40 years, the first step in\n", + "bringing fundamental change to an economy that hasn’t worked for the working people of this nation for too\n", + "long. \\n\\nFor the past 40 years we were told that if we gave tax breaks to those at the very top, the\n", + "benefits would trickle down to everyone else. \\n\\nBut that trickle-down theory led to weaker economic growth,\n", + "lower wages, bigger deficits, and the widest gap between those at the top and everyone else in nearly a\n", + "century. \\n\\nVice President Harris and I ran for office with a new economic vision for America. \\n\\nInvest in\n", + "America. Educate Americans. Grow the workforce. Build the economy from the bottom up \\nand the middle out,\n", + "not from the top down. \\n\\nBecause we know that when the middle class grows, the poor have a ladder up and\n", + "the wealthy do very well. \\n\\nAmerica used to have the best roads, bridges, and airports on Earth. \\n\\nNow our\n", + "infrastructure is ranked 13th in the world. \\n\\nWe won’t be able to compete for the jobs of the 21st Century\n", + "if we don’t fix that. \\n\\nThat’s why it was so important to pass the Bipartisan Infrastructure Law—the most\n", + "sweeping investment to rebuild America in history. \\n\\nThis was a bipartisan effort, and I want to thank the\n", + "members of both parties who worked to make it happen. \\n\\nWe’re done talking about infrastructure weeks.\n", + "\\n\\nWe’re going to have an infrastructure decade. \\n\\nIt is going to transform America and put us on a path to\n", + "win the economic competition of the 21st Century that we face with the rest of the world—particularly with\n", + "China. \\n\\nAs I’ve told Xi Jinping, it is never a good bet to bet against the American people. \\n\\nWe’ll\n", + "create good jobs for millions of Americans, modernizing roads, airports, ports, and waterways all across\n", + "America. \\n\\nAnd we’ll do it all to withstand the devastating effects of the climate crisis and promote\n", + "environmental justice. \\n\\nWe’ll build a national network of 500,000 electric vehicle charging stations, begin\n", + "to replace poisonous lead pipes—so every child—and every American—has clean water to drink at home and at\n", + "school, provide affordable high-speed internet for every American—urban, suburban, rural, and tribal\n", + "communities. \\n\\n4,000 projects have already been announced. \\n\\nAnd tonight, I’m announcing that this year we\n", + "will start fixing over 65,000 miles of highway and 1,500 bridges in disrepair. \\n\\nWhen we use taxpayer\n", + "dollars to rebuild America – we are going to Buy American: buy American products to support American jobs.\n", + "\\n\\nThe federal government spends about $600 Billion a year to keep the country safe and secure. \\n\\nThere’s\n", + "been a law on the books for almost a century \\nto make sure taxpayers’ dollars support American jobs and\n", + "businesses. \\n\\nEvery Administration says they’ll do it, but we are actually doing it. \\n\\nWe will buy\n", + "American to make sure everything from the deck of an aircraft carrier to the steel on highway guardrails are\n", + "made in America. \\n\\nBut to compete for the best jobs of the future, we also need to level the playing field\n", + "with China and other competitors. \\n\\nThat’s why it is so important to pass the Bipartisan Innovation Act\n", + "sitting in Congress that will make record investments in emerging technologies and American manufacturing.\n", + "\\n\\nLet me give you one example of why it’s so important to pass it. \\n\\nIf you travel 20 miles east of\n", + "Columbus, Ohio, you’ll find 1,000 empty acres of land. \\n\\nIt won’t look like much, but if you stop and look\n", + "closely, you’ll see a “Field of dreams,” the ground on which America’s future will be built. \\n\\nThis is where\n", + "Intel, the American company that helped build Silicon Valley, is going to build its $20 billion semiconductor\n", + "“mega site”. \\n\\nUp to eight state-of-the-art factories in one place. 10,000 new good-paying jobs. \\n\\nSome of\n", + "the most sophisticated manufacturing in the world to make computer chips the size of a fingertip that power\n", + "the world and our everyday lives. \\n\\nSmartphones. The Internet. Technology we have yet to invent. \\n\\nBut\n", + "that’s just the beginning. \\n\\nIntel’s CEO, Pat Gelsinger, who is here tonight, told me they are ready to\n", + "increase their investment from \\n$20 billion to $100 billion. \\n\\nThat would be one of the biggest\n", + "investments in manufacturing in American history. \\n\\nAnd all they’re waiting for is for you to pass this\n", + "bill. \\n\\nSo let’s not wait any longer. Send it to my desk. I’ll sign it. \\n\\nAnd we will really take off.\n", + "\\n\\nAnd Intel is not alone. \\n\\nThere’s something happening in America. \\n\\nJust look around and you’ll see an\n", + "amazing story. \\n\\nThe rebirth of the pride that comes from stamping products “Made In America.” The\n", + "revitalization of American manufacturing. \\n\\nCompanies are choosing to build new factories here, when just\n", + "a few years ago, they would have built them overseas. \\n\\nThat’s what is happening. Ford is investing $11\n", + "billion to build electric vehicles, creating 11,000 jobs across the country. \\n\\nGM is making the largest\n", + "investment in its history—$7 billion to build electric vehicles, creating 4,000 jobs in Michigan. \\n\\nAll\n", + "told, we created 369,000 new manufacturing jobs in America just last year. \\n\\nPowered by people I’ve met like\n", + "JoJo Burgess, from generations of union steelworkers from Pittsburgh, who’s here with us tonight. \\n\\nAs Ohio\n", + "Senator Sherrod Brown says, “It’s time to bury the label “Rust Belt.” \\n\\nIt’s time. \\n\\nBut with all the\n", + "bright spots in our economy, record job growth and higher wages, too many families are struggling to keep up\n", + "with the bills. \\n\\nInflation is robbing them of the gains they might otherwise feel. \\n\\nI get it. That’s\n", + "why my top priority is getting prices under control. \\n\\nLook, our economy roared back faster than most\n", + "predicted, but the pandemic meant that businesses had a hard time hiring enough workers to keep up production\n", + "in their factories. \\n\\nThe pandemic also disrupted global supply chains. \\n\\nWhen factories close, it takes\n", + "longer to make goods and get them from the warehouse to the store, and prices go up. \\n\\nLook at cars.\n", + "\\n\\nLast year, there weren’t enough semiconductors to make all the cars that people wanted to buy. \\n\\nAnd\n", + "guess what, prices of automobiles went up. \\n\\nSo—we have a choice. \\n\\nOne way to fight inflation is to drive\n", + "down wages and make Americans poorer. \\n\\nI have a better plan to fight inflation. \\n\\nLower your costs, not\n", + "your wages. \\n\\nMake more cars and semiconductors in America. \\n\\nMore infrastructure and innovation in\n", + "America. \\n\\nMore goods moving faster and cheaper in America. \\n\\nMore jobs where you can earn a good living\n", + "in America. \\n\\nAnd instead of relying on foreign supply chains, let’s make it in America. \\n\\nEconomists call\n", + "it “increasing the productive capacity of our economy.” \\n\\nI call it building a better America. \\n\\nMy plan\n", + "to fight inflation will lower your costs and lower the deficit. \\n\\n17 Nobel laureates in economics say my\n", + "plan will ease long-term inflationary pressures. Top business leaders and most Americans support my plan. And\n", + "here’s the plan: \\n\\nFirst – cut the cost of prescription drugs. Just look at insulin. One in ten Americans\n", + "has diabetes. In Virginia, I met a 13-year-old boy named Joshua Davis. \\n\\nHe and his Dad both have Type 1\n", + "diabetes, which means they need insulin every day. Insulin costs about $10 a vial to make. \\n\\nBut drug\n", + "companies charge families like Joshua and his Dad up to 30 times more. I spoke with Joshua’s mom. \\n\\nImagine\n", + "what it’s like to look at your child who needs insulin and have no idea how you’re going to pay for it.\n", + "\\n\\nWhat it does to your dignity, your ability to look your child in the eye, to be the parent you expect to\n", + "be. \\n\\nJoshua is here with us tonight. Yesterday was his birthday. Happy birthday, buddy. \\n\\nFor Joshua,\n", + "and for the 200,000 other young people with Type 1 diabetes, let’s cap the cost of insulin at $35 a month so\n", + "everyone can afford it. \\n\\nDrug companies will still do very well. And while we’re at it let Medicare\n", + "negotiate lower prices for prescription drugs, like the VA already does. \\n\\nLook, the American Rescue Plan is\n", + "helping millions of families on Affordable Care Act plans save $2,400 a year on their health care premiums.\n", + "Let’s close the coverage gap and make those savings permanent. \\n\\nSecond – cut energy costs for families an\n", + "average of $500 a year by combatting climate change. \\n\\nLet’s provide investments and tax credits to\n", + "weatherize your homes and businesses to be energy efficient and you get a tax credit; double America’s clean\n", + "energy production in solar, wind, and so much more; lower the price of electric vehicles, saving you another\n", + "$80 a month because you’ll never have to pay at the gas pump again. \\n\\nThird – cut the cost of child care.\n", + "Many families pay up to $14,000 a year for child care per child. \\n\\nMiddle-class and working families\n", + "shouldn’t have to pay more than 7% of their income for care of young children. \\n\\nMy plan will cut the cost\n", + "in half for most families and help parents, including millions of women, who left the workforce during the\n", + "pandemic because they couldn’t afford child care, to be able to get back to work. \\n\\nMy plan doesn’t stop\n", + "there. It also includes home and long-term care. More affordable housing. And Pre-K for every 3- and 4-year-\n", + "old. \\n\\nAll of these will lower costs. \\n\\nAnd under my plan, nobody earning less than $400,000 a year will\n", + "pay an additional penny in new taxes. Nobody. \\n\\nThe one thing all Americans agree on is that the tax system\n", + "is not fair. We have to fix it. \\n\\nI’m not looking to punish anyone. But let’s make sure corporations and\n", + "the wealthiest Americans start paying their fair share. \\n\\nJust last year, 55 Fortune 500 corporations earned\n", + "$40 billion in profits and paid zero dollars in federal income tax. \\n\\nThat’s simply not fair. That’s why\n", + "I’ve proposed a 15% minimum tax rate for corporations. \\n\\nWe got more than 130 countries to agree on a global\n", + "minimum tax rate so companies can’t get out of paying their taxes at home by shipping jobs and factories\n", + "overseas. \\n\\nThat’s why I’ve proposed closing loopholes so the very wealthy don’t pay a lower tax rate than a\n", + "teacher or a firefighter. \\n\\nSo that’s my plan. It will grow the economy and lower costs for families.\n", + "\\n\\nSo what are we waiting for? Let’s get this done. And while you’re at it, confirm my nominees to the\n", + "Federal Reserve, which plays a critical role in fighting inflation. \\n\\nMy plan will not only lower costs to\n", + "give families a fair shot, it will lower the deficit. \\n\\nThe previous Administration not only ballooned the\n", + "deficit with tax cuts for the very wealthy and corporations, it undermined the watchdogs whose job was to keep\n", + "pandemic relief funds from being wasted. \\n\\nBut in my administration, the watchdogs have been welcomed back.\n", + "\\n\\nWe’re going after the criminals who stole billions in relief money meant for small businesses and millions\n", + "of Americans. \\n\\nAnd tonight, I’m announcing that the Justice Department will name a chief prosecutor for\n", + "pandemic fraud. \\n\\nBy the end of this year, the deficit will be down to less than half what it was before I\n", + "took office. \\n\\nThe only president ever to cut the deficit by more than one trillion dollars in a single\n", + "year. \\n\\nLowering your costs also means demanding more competition. \\n\\nI’m a capitalist, but capitalism\n", + "without competition isn’t capitalism. \\n\\nIt’s exploitation—and it drives up prices. \\n\\nWhen corporations\n", + "don’t have to compete, their profits go up, your prices go up, and small businesses and family farmers and\n", + "ranchers go under. \\n\\nWe see it happening with ocean carriers moving goods in and out of America. \\n\\nDuring\n", + "the pandemic, these foreign-owned companies raised prices by as much as 1,000% and made record profits.\n", + "\\n\\nTonight, I’m announcing a crackdown on these companies overcharging American businesses and consumers.\n", + "\\n\\nAnd as Wall Street firms take over more nursing homes, quality in those homes has gone down and costs have\n", + "gone up. \\n\\nThat ends on my watch. \\n\\nMedicare is going to set higher standards for nursing homes and make\n", + "sure your loved ones get the care they deserve and expect. \\n\\nWe’ll also cut costs and keep the economy going\n", + "strong by giving workers a fair shot, provide more training and apprenticeships, hire them based on their\n", + "skills not degrees. \\n\\nLet’s pass the Paycheck Fairness Act and paid leave. \\n\\nRaise the minimum wage to\n", + "$15 an hour and extend the Child Tax Credit, so no one has to raise a family in poverty. \\n\\nLet’s increase\n", + "Pell Grants and increase our historic support of HBCUs, and invest in what Jill—our First Lady who teaches\n", + "full-time—calls America’s best-kept secret: community colleges. \\n\\nAnd let’s pass the PRO Act when a majority\n", + "of workers want to form a union—they shouldn’t be stopped. \\n\\nWhen we invest in our workers, when we build\n", + "the economy from the bottom up and the middle out together, we can do something we haven’t done in a long\n", + "time: build a better America. \\n\\nFor more than two years, COVID-19 has impacted every decision in our lives\n", + "and the life of the nation. \\n\\nAnd I know you’re tired, frustrated, and exhausted. \\n\\nBut I also know this.\n", + "\\n\\nBecause of the progress we’ve made, because of your resilience and the tools we have, tonight I can say\n", + "\\nwe are moving forward safely, back to more normal routines. \\n\\nWe’ve reached a new moment in the fight\n", + "against COVID-19, with severe cases down to a level not seen since last July. \\n\\nJust a few days ago, the\n", + "Centers for Disease Control and Prevention—the CDC—issued new mask guidelines. \\n\\nUnder these new guidelines,\n", + "most Americans in most of the country can now be mask free. \\n\\nAnd based on the projections, more of the\n", + "country will reach that point across the next couple of weeks. \\n\\nThanks to the progress we have made this\n", + "past year, COVID-19 need no longer control our lives. \\n\\nI know some are talking about “living with\n", + "COVID-19”. Tonight – I say that we will never just accept living with COVID-19. \\n\\nWe will continue to combat\n", + "the virus as we do other diseases. And because this is a virus that mutates and spreads, we will stay on\n", + "guard. \\n\\nHere are four common sense steps as we move forward safely. \\n\\nFirst, stay protected with\n", + "vaccines and treatments. We know how incredibly effective vaccines are. If you’re vaccinated and boosted you\n", + "have the highest degree of protection. \\n\\nWe will never give up on vaccinating more Americans. Now, I know\n", + "parents with kids under 5 are eager to see a vaccine authorized for their children. \\n\\nThe scientists are\n", + "working hard to get that done and we’ll be ready with plenty of vaccines when they do. \\n\\nWe’re also ready\n", + "with anti-viral treatments. If you get COVID-19, the Pfizer pill reduces your chances of ending up in the\n", + "hospital by 90%. \\n\\nWe’ve ordered more of these pills than anyone in the world. And Pfizer is working\n", + "overtime to get us 1 Million pills this month and more than double that next month. \\n\\nAnd we’re launching\n", + "the “Test to Treat” initiative so people can get tested at a pharmacy, and if they’re positive, receive\n", + "antiviral pills on the spot at no cost. \\n\\nIf you’re immunocompromised or have some other vulnerability, we\n", + "have treatments and free high-quality masks. \\n\\nWe’re leaving no one behind or ignoring anyone’s needs as we\n", + "move forward. \\n\\nAnd on testing, we have made hundreds of millions of tests available for you to order for\n", + "free. \\n\\nEven if you already ordered free tests tonight, I am announcing that you can order more from\n", + "covidtests.gov starting next week. \\n\\nSecond – we must prepare for new variants. Over the past year, we’ve\n", + "gotten much better at detecting new variants. \\n\\nIf necessary, we’ll be able to deploy new vaccines within\n", + "100 days instead of many more months or years. \\n\\nAnd, if Congress provides the funds we need, we’ll have\n", + "new stockpiles of tests, masks, and pills ready if needed. \\n\\nI cannot promise a new variant won’t come. But\n", + "I can promise you we’ll do everything within our power to be ready if it does. \\n\\nThird – we can end the\n", + "shutdown of schools and businesses. We have the tools we need. \\n\\nIt’s time for Americans to get back to work\n", + "and fill our great downtowns again. People working from home can feel safe to begin to return to the office.\n", + "\\n\\nWe’re doing that here in the federal government. The vast majority of federal workers will once again work\n", + "in person. \\n\\nOur schools are open. Let’s keep it that way. Our kids need to be in school. \\n\\nAnd with 75%\n", + "of adult Americans fully vaccinated and hospitalizations down by 77%, most Americans can remove their masks,\n", + "return to work, stay in the classroom, and move forward safely. \\n\\nWe achieved this because we provided free\n", + "vaccines, treatments, tests, and masks. \\n\\nOf course, continuing this costs money. \\n\\nI will soon send\n", + "Congress a request. \\n\\nThe vast majority of Americans have used these tools and may want to again, so I\n", + "expect Congress to pass it quickly. \\n\\nFourth, we will continue vaccinating the world. \\n\\nWe’ve sent\n", + "475 Million vaccine doses to 112 countries, more than any other nation. \\n\\nAnd we won’t stop. \\n\\nWe have\n", + "lost so much to COVID-19. Time with one another. And worst of all, so much loss of life. \\n\\nLet’s use this\n", + "moment to reset. Let’s stop looking at COVID-19 as a partisan dividing line and see it for what it is: A God-\n", + "awful disease. \\n\\nLet’s stop seeing each other as enemies, and start seeing each other for who we really\n", + "are: Fellow Americans. \\n\\nWe can’t change how divided we’ve been. But we can change how we move forward—on\n", + "COVID-19 and other issues we must face together. \\n\\nI recently visited the New York City Police Department\n", + "days after the funerals of Officer Wilbert Mora and his partner, Officer Jason Rivera. \\n\\nThey were\n", + "responding to a 9-1-1 call when a man shot and killed them with a stolen gun. \\n\\nOfficer Mora was 27 years\n", + "old. \\n\\nOfficer Rivera was 22. \\n\\nBoth Dominican Americans who’d grown up on the same streets they later\n", + "chose to patrol as police officers. \\n\\nI spoke with their families and told them that we are forever in debt\n", + "for their sacrifice, and we will carry on their mission to restore the trust and safety every community\n", + "deserves. \\n\\nI’ve worked on these issues a long time. \\n\\nI know what works: Investing in crime preventionand\n", + "community police officers who’ll walk the beat, who’ll know the neighborhood, and who can restore trust and\n", + "safety. \\n\\nSo let’s not abandon our streets. Or choose between safety and equal justice. \\n\\nLet’s come\n", + "together to protect our communities, restore trust, and hold law enforcement accountable. \\n\\nThat’s why the\n", + "Justice Department required body cameras, banned chokeholds, and restricted no-knock warrants for its\n", + "officers. \\n\\nThat’s why the American Rescue Plan provided $350 Billion that cities, states, and counties can\n", + "use to hire more police and invest in proven strategies like community violence interruption—trusted\n", + "messengers breaking the cycle of violence and trauma and giving young people hope. \\n\\nWe should all agree:\n", + "The answer is not to Defund the police. The answer is to FUND the police with the resources and training they\n", + "need to protect our communities. \\n\\nI ask Democrats and Republicans alike: Pass my budget and keep our\n", + "neighborhoods safe. \\n\\nAnd I will keep doing everything in my power to crack down on gun trafficking and\n", + "ghost guns you can buy online and make at home—they have no serial numbers and can’t be traced. \\n\\nAnd I ask\n", + "Congress to pass proven measures to reduce gun violence. Pass universal background checks. Why should anyone\n", + "on a terrorist list be able to purchase a weapon? \\n\\nBan assault weapons and high-capacity magazines.\n", + "\\n\\nRepeal the liability shield that makes gun manufacturers the only industry in America that can’t be sued.\n", + "\\n\\nThese laws don’t infringe on the Second Amendment. They save lives. \\n\\nThe most fundamental right in\n", + "America is the right to vote – and to have it counted. And it’s under assault. \\n\\nIn state after state, new\n", + "laws have been passed, not only to suppress the vote, but to subvert entire elections. \\n\\nWe cannot let this\n", + "happen. \\n\\nTonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights\n", + "Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections.\n", + "\\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen\n", + "Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court.\n", + "Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a\n", + "President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days\n", + "ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal\n", + "minds, who will continue Justice Breyer’s legacy of excellence. \\n\\nA former top litigator in private\n", + "practice. A former federal public defender. And from a family of public school educators and police officers.\n", + "A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal\n", + "Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty\n", + "and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border,\n", + "we’ve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWe’ve set up\n", + "joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWe’re putting in place dedicated\n", + "immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWe’re\n", + "securing commitments and supporting partners in South and Central America to host more refugees and secure\n", + "their own borders. \\n\\nWe can do all this while keeping lit the torch of liberty that has led generations of\n", + "immigrants to this land—my forefathers and so many of yours. \\n\\nProvide a pathway to citizenship for\n", + "Dreamers, those on temporary status, farm workers, and essential workers. \\n\\nRevise our laws so businesses\n", + "have the workers they need and families don’t wait decades to reunite. \\n\\nIt’s not only the right thing to\n", + "do—it’s the economically smart thing to do. \\n\\nThat’s why immigration reform is supported by everyone from\n", + "labor unions to religious leaders to the U.S. Chamber of Commerce. \\n\\nLet’s get it done once and for all.\n", + "\\n\\nAdvancing liberty and justice also requires protecting the rights of women. \\n\\nThe constitutional right\n", + "affirmed in Roe v. Wade—standing precedent for half a century—is under attack as never before. \\n\\nIf we want\n", + "to go forward—not backward—we must protect access to health care. Preserve a woman’s right to choose. And\n", + "let’s continue to advance maternal health care in America. \\n\\nAnd for our LGBTQ+ Americans, let’s finally get\n", + "the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their\n", + "families is wrong. \\n\\nAs I said last year, especially to our younger transgender Americans, I will always\n", + "have your back as your President, so you can be yourself and reach your God-given potential. \\n\\nWhile it\n", + "often appears that we never agree, that isn’t true. I signed 80 bipartisan bills into law last year. From\n", + "preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming\n", + "military justice. \\n\\nAnd soon, we’ll strengthen the Violence Against Women Act that I first wrote three\n", + "decades ago. It is important for us to show the nation that we can come together and do big things. \\n\\nSo\n", + "tonight I’m offering a Unity Agenda for the Nation. Four big things we can do together. \\n\\nFirst, beat the\n", + "opioid epidemic. \\n\\nThere is so much we can do. Increase funding for prevention, treatment, harm reduction,\n", + "and recovery. \\n\\nGet rid of outdated rules that stop doctors from prescribing treatments. And stop the flow\n", + "of illicit drugs by working with state and local law enforcement to go after traffickers. \\n\\nIf you’re\n", + "suffering from addiction, know you are not alone. I believe in recovery, and I celebrate the 23 million\n", + "Americans in recovery. \\n\\nSecond, let’s take on mental health. Especially among our children, whose lives and\n", + "education have been turned upside down. \\n\\nThe American Rescue Plan gave schools money to hire teachers and\n", + "help students make up for lost learning. \\n\\nI urge every parent to make sure your school does just that. And\n", + "we can all play a part—sign up to be a tutor or a mentor. \\n\\nChildren were also struggling before the\n", + "pandemic. Bullying, violence, trauma, and the harms of social media. \\n\\nAs Frances Haugen, who is here with\n", + "us tonight, has shown, we must hold social media platforms accountable for the national experiment they’re\n", + "conducting on our children for profit. \\n\\nIt’s time to strengthen privacy protections, ban targeted\n", + "advertising to children, demand tech companies stop collecting personal data on our children. \\n\\nAnd let’s\n", + "get all Americans the mental health services they need. More people they can turn to for help, and full parity\n", + "between physical and mental health care. \\n\\nThird, support our veterans. \\n\\nVeterans are the best of us.\n", + "\\n\\nI’ve always believed that we have a sacred obligation to equip all those we send to war and care for them\n", + "and their families when they come home. \\n\\nMy administration is providing assistance with job training and\n", + "housing, and now helping lower-income veterans get VA care debt-free. \\n\\nOur troops in Iraq and Afghanistan\n", + "faced many dangers. \\n\\nOne was stationed at bases and breathing in toxic smoke from “burn pits” that\n", + "incinerated wastes of war—medical and hazard material, jet fuel, and more. \\n\\nWhen they came home, many of\n", + "the world’s fittest and best trained warriors were never the same. \\n\\nHeadaches. Numbness. Dizziness. \\n\\nA\n", + "cancer that would put them in a flag-draped coffin. \\n\\nI know. \\n\\nOne of those soldiers was my son Major\n", + "Beau Biden. \\n\\nWe don’t know for sure if a burn pit was the cause of his brain cancer, or the diseases of so\n", + "many of our troops. \\n\\nBut I’m committed to finding out everything we can. \\n\\nCommitted to military families\n", + "like Danielle Robinson from Ohio. \\n\\nThe widow of Sergeant First Class Heath Robinson. \\n\\nHe was born a\n", + "soldier. Army National Guard. Combat medic in Kosovo and Iraq. \\n\\nStationed near Baghdad, just yards from\n", + "burn pits the size of football fields. \\n\\nHeath’s widow Danielle is here with us tonight. They loved going to\n", + "Ohio State football games. He loved building Legos with their daughter. \\n\\nBut cancer from prolonged exposure\n", + "to burn pits ravaged Heath’s lungs and body. \\n\\nDanielle says Heath was a fighter to the very end. \\n\\nHe\n", + "didn’t know how to stop fighting, and neither did she. \\n\\nThrough her pain she found purpose to demand we do\n", + "better. \\n\\nTonight, Danielle—we are. \\n\\nThe VA is pioneering new ways of linking toxic exposures to\n", + "diseases, already helping more veterans get benefits. \\n\\nAnd tonight, I’m announcing we’re expanding\n", + "eligibility to veterans suffering from nine respiratory cancers. \\n\\nI’m also calling on Congress: pass a law\n", + "to make sure veterans devastated by toxic exposures in Iraq and Afghanistan finally get the benefits and\n", + "comprehensive health care they deserve. \\n\\nAnd fourth, let’s end cancer as we know it. \\n\\nThis is personal\n", + "to me and Jill, to Kamala, and to so many of you. \\n\\nCancer is the #2 cause of death in America–second only\n", + "to heart disease. \\n\\nLast month, I announced our plan to supercharge \\nthe Cancer Moonshot that President\n", + "Obama asked me to lead six years ago. \\n\\nOur goal is to cut the cancer death rate by at least 50% over the\n", + "next 25 years, turn more cancers from death sentences into treatable diseases. \\n\\nMore support for patients\n", + "and families. \\n\\nTo get there, I call on Congress to fund ARPA-H, the Advanced Research Projects Agency for\n", + "Health. \\n\\nIt’s based on DARPA—the Defense Department project that led to the Internet, GPS, and so much\n", + "more. \\n\\nARPA-H will have a singular purpose—to drive breakthroughs in cancer, Alzheimer’s, diabetes, and\n", + "more. \\n\\nA unity agenda for the nation. \\n\\nWe can do this. \\n\\nMy fellow Americans—tonight , we have\n", + "gathered in a sacred space—the citadel of our democracy. \\n\\nIn this Capitol, generation after generation,\n", + "Americans have debated great questions amid great strife, and have done great things. \\n\\nWe have fought for\n", + "freedom, expanded liberty, defeated totalitarianism and terror. \\n\\nAnd built the strongest, freest, and most\n", + "prosperous nation the world has ever known. \\n\\nNow is the hour. \\n\\nOur moment of responsibility. \\n\\nOur\n", + "test of resolve and conscience, of history itself. \\n\\nIt is in this moment that our character is formed. Our\n", + "purpose is found. Our future is forged. \\n\\nWell I know this nation. \\n\\nWe will meet the test. \\n\\nTo\n", + "protect freedom and liberty, to expand fairness and opportunity. \\n\\nWe will save democracy. \\n\\nAs hard as\n", + "these times have been, I am more optimistic about America today than I have been my whole life. \\n\\nBecause I\n", + "see the future that is within our grasp. \\n\\nBecause I know there is simply nothing beyond our capacity.\n", + "\\n\\nWe are the only nation on Earth that has always turned every crisis we have faced into an opportunity.\n", + "\\n\\nThe only nation that can be defined by a single word: possibilities. \\n\\nSo on this night, in our 245th\n", + "year as a nation, I have come to report on the State of the Union. \\n\\nAnd my report is this: the State of the\n", + "Union is strong—because you, the American people, are strong. \\n\\nWe are stronger today than we were a year\n", + "ago. \\n\\nAnd we will be stronger a year from now than we are today. \\n\\nNow is our moment to meet and overcome\n", + "the challenges of our time. \\n\\nAnd we will, as one people. \\n\\nOne America. \\n\\nThe United States of America.\n", + "\\n\\nMay God bless you all. May God protect our troops.' metadata={'source': './state_of_the_union.txt'}\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "mLO2H5x7muua" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Text Splitter\n", + "from langchain.text_splitter import CharacterTextSplitter\n", + "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", + "docs = text_splitter.split_documents(documents)" + ], + "metadata": { + "id": "_qXNpk8vXlys" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "len(docs)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "gEUp4OtxnuXL", + "outputId": "544790bb-06e2-439f-e74d-82011d86c5f5" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "42" + ] + }, + "metadata": {}, + "execution_count": 111 + } + ] + }, + { + "cell_type": "code", + "source": [ + "docs[0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "z_HUKypknucr", + "outputId": "9f999ded-88e5-4742-bb0b-c02d9915f60b" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Document(page_content='Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \\n\\nLast year COVID-19 kept us apart. This year we are finally together again. \\n\\nTonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \\n\\nWith a duty to one another to the American people to the Constitution. \\n\\nAnd with an unwavering resolve that freedom will always triumph over tyranny. \\n\\nSix days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \\n\\nHe thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \\n\\nHe met the Ukrainian people. \\n\\nFrom President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world.', metadata={'source': './state_of_the_union.txt'})" + ] + }, + "metadata": {}, + "execution_count": 112 + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "jkfe4cA3aQqi" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Embeddings" + ], + "metadata": { + "id": "G6HWzhWUaRIb" + } + }, + { + "cell_type": "code", + "source": [ + "# Embeddings\n", + "from langchain.embeddings import HuggingFaceEmbeddings\n", + "\n", + "embeddings = HuggingFaceEmbeddings()" + ], + "metadata": { + "id": "uctYbK6YXmxI" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "!pip install faiss-cpu" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_wS3_6gbXqvE", + "outputId": "75702f82-ab73-41ac-e0dc-d74f62460bea" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", + "Requirement already satisfied: faiss-cpu in /usr/local/lib/python3.9/dist-packages (1.7.3)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "264Et_-RXuv4" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Vectorstore: https://python.langchain.com/en/latest/modules/indexes/vectorstores.html\n", + "from langchain.vectorstores import FAISS\n", + "\n", + "db = FAISS.from_documents(docs, embeddings)\n", + "\n" + ], + "metadata": { + "id": "TTg8T2tJXvTI" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "query = \"What did the president say about the Supreme Court\"\n", + "docs = db.similarity_search(query)" + ], + "metadata": { + "id": "IYQOej1MpiH2" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print(wrap_text_preserve_newlines(str(docs[0].page_content)))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "v7li6KoVpiQh", + "outputId": "4745c9f2-b6f9-4295-a56a-9dc7306f72ad" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And\n", + "while you’re at it, pass the Disclose Act so Americans can know who is funding our elections.\n", + "\n", + "Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an\n", + "Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer,\n", + "thank you for your service.\n", + "\n", + "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the\n", + "United States Supreme Court.\n", + "\n", + "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our\n", + "nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "tsSGNm8UdsX3" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Create QA Chain" + ], + "metadata": { + "id": "PzWUbpH6bQ2l" + } + }, + { + "cell_type": "code", + "source": [ + "from langchain.chains.question_answering import load_qa_chain\n", + "from langchain import HuggingFaceHub\n" + ], + "metadata": { + "id": "rzJnHsJ_bThf" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "llm=HuggingFaceHub(repo_id=\"google/flan-t5-xl\", model_kwargs={\"temperature\":0, \"max_length\":512})" + ], + "metadata": { + "id": "_IZn8b8Aci4w" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "chain = load_qa_chain(llm, chain_type=\"stuff\")" + ], + "metadata": { + "id": "X2IMgDRGsVV4" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "query = \"What did the president say about the Supreme Court\"\n", + "docs = db.similarity_search(query)\n", + "chain.run(input_documents=docs, question=query)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 52 + }, + "id": "aemehFTEsVZG", + "outputId": "11f7d225-8232-43f5-a99e-2303a22c0fdc" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 126 + } + ] + }, + { + "cell_type": "code", + "source": [ + "query = \"What did the president say about economy?\"\n", + "docs = db.similarity_search(query)\n", + "chain.run(input_documents=docs, question=query)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "SVOXzjaNsVb8", + "outputId": "b3ebaea1-3031-4484-e827-ef63f96c1def" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'Build the economy from the bottom up and the middle out, not from the top down'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 127 + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 52 + }, + "id": "ZCFK5UYAcwqc", + "outputId": "71eba237-3fbb-4792-f662-a000496b50ff" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 81 + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "LBjTA5TecxqT", + "outputId": "472b7ef8-384d-424a-f16c-d3a21044b8d5" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'not enough information'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 109 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Working with PDF Files" + ], + "metadata": { + "id": "MNdVzDaNe39j" + } + }, + { + "cell_type": "code", + "source": [ + "!pip install unstructured\n", + "!pip install chromadb\n", + "!pip install Cython\n", + "!pip install tiktoken\n", + "!pip install unstructured[local-inference]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "0NDARXbveTcI", + "outputId": "154035b6-df10-44d3-c53a-7831b7625494" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", + "Requirement already satisfied: unstructured in /usr/local/lib/python3.9/dist-packages (0.5.12)\n", + "Requirement already satisfied: openpyxl in /usr/local/lib/python3.9/dist-packages (from unstructured) (3.0.10)\n", + "Requirement already satisfied: python-docx in /usr/local/lib/python3.9/dist-packages (from unstructured) (0.8.11)\n", + "Requirement already satisfied: lxml in /usr/local/lib/python3.9/dist-packages (from unstructured) (4.9.2)\n", + "Requirement already satisfied: pillow in /usr/local/lib/python3.9/dist-packages (from unstructured) (9.5.0)\n", + "Requirement already satisfied: nltk in /usr/local/lib/python3.9/dist-packages (from unstructured) (3.8.1)\n", + "Requirement already satisfied: python-pptx in /usr/local/lib/python3.9/dist-packages (from unstructured) (0.6.21)\n", + "Requirement already satisfied: markdown in /usr/local/lib/python3.9/dist-packages (from unstructured) (3.4.3)\n", + "Requirement already satisfied: pypandoc in /usr/local/lib/python3.9/dist-packages (from unstructured) (1.11)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.9/dist-packages (from unstructured) (1.5.3)\n", + "Requirement already satisfied: python-magic in /usr/local/lib/python3.9/dist-packages (from unstructured) (0.4.27)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.9/dist-packages (from unstructured) (2.28.2)\n", + "Requirement already satisfied: msg-parser in /usr/local/lib/python3.9/dist-packages (from unstructured) (1.2.0)\n", + "Requirement already satisfied: argilla in /usr/local/lib/python3.9/dist-packages (from unstructured) (1.6.0)\n", + "Requirement already satisfied: certifi>=2022.12.07 in /usr/local/lib/python3.9/dist-packages (from unstructured) (2022.12.7)\n", + "Requirement already satisfied: deprecated~=1.2.0 in /usr/local/lib/python3.9/dist-packages (from argilla->unstructured) (1.2.13)\n", + "Requirement already satisfied: rich<=13.0.1 in /usr/local/lib/python3.9/dist-packages (from argilla->unstructured) (13.0.1)\n", + "Requirement already satisfied: monotonic in /usr/local/lib/python3.9/dist-packages (from argilla->unstructured) (1.6)\n", + "Requirement already satisfied: wrapt<1.15,>=1.13 in /usr/local/lib/python3.9/dist-packages (from argilla->unstructured) (1.14.1)\n", + "Requirement already satisfied: tqdm>=4.27.0 in /usr/local/lib/python3.9/dist-packages (from argilla->unstructured) (4.65.0)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.9/dist-packages (from argilla->unstructured) (23.0)\n", + "Requirement already satisfied: numpy<1.24.0 in /usr/local/lib/python3.9/dist-packages (from argilla->unstructured) (1.22.4)\n", + "Requirement already satisfied: pydantic>=1.7.1 in /usr/local/lib/python3.9/dist-packages (from argilla->unstructured) (1.10.7)\n", + "Requirement already satisfied: backoff in /usr/local/lib/python3.9/dist-packages (from argilla->unstructured) (2.2.1)\n", + "Requirement already satisfied: httpx<0.24,>=0.15 in /usr/local/lib/python3.9/dist-packages (from argilla->unstructured) (0.23.3)\n", + "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.9/dist-packages (from pandas->unstructured) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.9/dist-packages (from pandas->unstructured) (2022.7.1)\n", + "Requirement already satisfied: importlib-metadata>=4.4 in /usr/local/lib/python3.9/dist-packages (from markdown->unstructured) (6.3.0)\n", + "Requirement already satisfied: olefile>=0.46 in /usr/local/lib/python3.9/dist-packages (from msg-parser->unstructured) (0.46)\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.9/dist-packages (from nltk->unstructured) (1.2.0)\n", + "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.9/dist-packages (from nltk->unstructured) (2022.10.31)\n", + "Requirement already satisfied: click in /usr/local/lib/python3.9/dist-packages (from nltk->unstructured) (8.1.3)\n", + "Requirement already satisfied: et-xmlfile in /usr/local/lib/python3.9/dist-packages (from openpyxl->unstructured) (1.1.0)\n", + "Requirement already satisfied: XlsxWriter>=0.5.7 in /usr/local/lib/python3.9/dist-packages (from python-pptx->unstructured) (3.1.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests->unstructured) (3.4)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.9/dist-packages (from requests->unstructured) (2.0.12)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests->unstructured) (1.26.15)\n", + "Requirement already satisfied: rfc3986[idna2008]<2,>=1.3 in /usr/local/lib/python3.9/dist-packages (from httpx<0.24,>=0.15->argilla->unstructured) (1.5.0)\n", + "Requirement already satisfied: sniffio in /usr/local/lib/python3.9/dist-packages (from httpx<0.24,>=0.15->argilla->unstructured) (1.3.0)\n", + "Requirement already satisfied: httpcore<0.17.0,>=0.15.0 in /usr/local/lib/python3.9/dist-packages (from httpx<0.24,>=0.15->argilla->unstructured) (0.16.3)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.9/dist-packages (from importlib-metadata>=4.4->markdown->unstructured) (3.15.0)\n", + "Requirement already satisfied: typing-extensions>=4.2.0 in /usr/local/lib/python3.9/dist-packages (from pydantic>=1.7.1->argilla->unstructured) (4.5.0)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.9/dist-packages (from python-dateutil>=2.8.1->pandas->unstructured) (1.16.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.6.0 in /usr/local/lib/python3.9/dist-packages (from rich<=13.0.1->argilla->unstructured) (2.14.0)\n", + "Requirement already satisfied: commonmark<0.10.0,>=0.9.0 in /usr/local/lib/python3.9/dist-packages (from rich<=13.0.1->argilla->unstructured) (0.9.1)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.9/dist-packages (from httpcore<0.17.0,>=0.15.0->httpx<0.24,>=0.15->argilla->unstructured) (0.14.0)\n", + "Requirement already satisfied: anyio<5.0,>=3.0 in /usr/local/lib/python3.9/dist-packages (from httpcore<0.17.0,>=0.15.0->httpx<0.24,>=0.15->argilla->unstructured) (3.6.2)\n", + "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", + "Requirement already satisfied: chromadb in /usr/local/lib/python3.9/dist-packages (0.3.21)\n", + "Requirement already satisfied: uvicorn[standard]>=0.18.3 in /usr/local/lib/python3.9/dist-packages (from chromadb) (0.21.1)\n", + "Requirement already satisfied: requests>=2.28 in /usr/local/lib/python3.9/dist-packages (from chromadb) (2.28.2)\n", + "Requirement already satisfied: sentence-transformers>=2.2.2 in /usr/local/lib/python3.9/dist-packages (from chromadb) (2.2.2)\n", + "Requirement already satisfied: posthog>=2.4.0 in /usr/local/lib/python3.9/dist-packages (from chromadb) (3.0.0)\n", + "Requirement already satisfied: pandas>=1.3 in /usr/local/lib/python3.9/dist-packages (from chromadb) (1.5.3)\n", + "Requirement already satisfied: duckdb>=0.7.1 in /usr/local/lib/python3.9/dist-packages (from chromadb) (0.7.1)\n", + "Requirement already satisfied: pydantic>=1.9 in /usr/local/lib/python3.9/dist-packages (from chromadb) (1.10.7)\n", + "Requirement already satisfied: numpy>=1.21.6 in /usr/local/lib/python3.9/dist-packages (from chromadb) (1.22.4)\n", + "Requirement already satisfied: clickhouse-connect>=0.5.7 in /usr/local/lib/python3.9/dist-packages (from chromadb) (0.5.20)\n", + "Requirement already satisfied: fastapi>=0.85.1 in /usr/local/lib/python3.9/dist-packages (from chromadb) (0.95.1)\n", + "Requirement already satisfied: hnswlib>=0.7 in /usr/local/lib/python3.9/dist-packages (from chromadb) (0.7.0)\n", + "Requirement already satisfied: zstandard in /usr/local/lib/python3.9/dist-packages (from clickhouse-connect>=0.5.7->chromadb) (0.21.0)\n", + "Requirement already satisfied: urllib3>=1.26 in /usr/local/lib/python3.9/dist-packages (from clickhouse-connect>=0.5.7->chromadb) (1.26.15)\n", + "Requirement already satisfied: certifi in /usr/local/lib/python3.9/dist-packages (from clickhouse-connect>=0.5.7->chromadb) (2022.12.7)\n", + "Requirement already satisfied: lz4 in /usr/local/lib/python3.9/dist-packages (from clickhouse-connect>=0.5.7->chromadb) (4.3.2)\n", + "Requirement already satisfied: pytz in /usr/local/lib/python3.9/dist-packages (from clickhouse-connect>=0.5.7->chromadb) (2022.7.1)\n", + "Requirement already satisfied: starlette<0.27.0,>=0.26.1 in /usr/local/lib/python3.9/dist-packages (from fastapi>=0.85.1->chromadb) (0.26.1)\n", + "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.9/dist-packages (from pandas>=1.3->chromadb) (2.8.2)\n", + "Requirement already satisfied: backoff>=1.10.0 in /usr/local/lib/python3.9/dist-packages (from posthog>=2.4.0->chromadb) (2.2.1)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.9/dist-packages (from posthog>=2.4.0->chromadb) (1.16.0)\n", + "Requirement already satisfied: monotonic>=1.5 in /usr/local/lib/python3.9/dist-packages (from posthog>=2.4.0->chromadb) (1.6)\n", + "Requirement already satisfied: typing-extensions>=4.2.0 in /usr/local/lib/python3.9/dist-packages (from pydantic>=1.9->chromadb) (4.5.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests>=2.28->chromadb) (3.4)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.9/dist-packages (from requests>=2.28->chromadb) (2.0.12)\n", + "Requirement already satisfied: torchvision in /usr/local/lib/python3.9/dist-packages (from sentence-transformers>=2.2.2->chromadb) (0.15.1+cu118)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.9/dist-packages (from sentence-transformers>=2.2.2->chromadb) (4.65.0)\n", + "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.9/dist-packages (from sentence-transformers>=2.2.2->chromadb) (1.2.2)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.9/dist-packages (from sentence-transformers>=2.2.2->chromadb) (1.10.1)\n", + "Requirement already satisfied: transformers<5.0.0,>=4.6.0 in /usr/local/lib/python3.9/dist-packages (from sentence-transformers>=2.2.2->chromadb) (4.28.1)\n", + "Requirement already satisfied: nltk in /usr/local/lib/python3.9/dist-packages (from sentence-transformers>=2.2.2->chromadb) (3.8.1)\n", + "Requirement already satisfied: huggingface-hub>=0.4.0 in /usr/local/lib/python3.9/dist-packages (from sentence-transformers>=2.2.2->chromadb) (0.13.4)\n", + "Requirement already satisfied: sentencepiece in /usr/local/lib/python3.9/dist-packages (from sentence-transformers>=2.2.2->chromadb) (0.1.98)\n", + "Requirement already satisfied: torch>=1.6.0 in /usr/local/lib/python3.9/dist-packages (from sentence-transformers>=2.2.2->chromadb) (2.0.0+cu118)\n", + "Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.9/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (8.1.3)\n", + "Requirement already satisfied: h11>=0.8 in /usr/local/lib/python3.9/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.14.0)\n", + "Requirement already satisfied: uvloop!=0.15.0,!=0.15.1,>=0.14.0 in /usr/local/lib/python3.9/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.17.0)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.9/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (6.0)\n", + "Requirement already satisfied: watchfiles>=0.13 in /usr/local/lib/python3.9/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.19.0)\n", + "Requirement already satisfied: websockets>=10.4 in /usr/local/lib/python3.9/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (11.0.1)\n", + "Requirement already satisfied: python-dotenv>=0.13 in /usr/local/lib/python3.9/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (1.0.0)\n", + "Requirement already satisfied: httptools>=0.5.0 in /usr/local/lib/python3.9/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.5.0)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.9/dist-packages (from huggingface-hub>=0.4.0->sentence-transformers>=2.2.2->chromadb) (23.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.9/dist-packages (from huggingface-hub>=0.4.0->sentence-transformers>=2.2.2->chromadb) (3.11.0)\n", + "Requirement already satisfied: anyio<5,>=3.4.0 in /usr/local/lib/python3.9/dist-packages (from starlette<0.27.0,>=0.26.1->fastapi>=0.85.1->chromadb) (3.6.2)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.9/dist-packages (from torch>=1.6.0->sentence-transformers>=2.2.2->chromadb) (3.1)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.9/dist-packages (from torch>=1.6.0->sentence-transformers>=2.2.2->chromadb) (3.1.2)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.9/dist-packages (from torch>=1.6.0->sentence-transformers>=2.2.2->chromadb) (1.11.1)\n", + "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.9/dist-packages (from torch>=1.6.0->sentence-transformers>=2.2.2->chromadb) (2.0.0)\n", + "Requirement already satisfied: lit in /usr/local/lib/python3.9/dist-packages (from triton==2.0.0->torch>=1.6.0->sentence-transformers>=2.2.2->chromadb) (16.0.1)\n", + "Requirement already satisfied: cmake in /usr/local/lib/python3.9/dist-packages (from triton==2.0.0->torch>=1.6.0->sentence-transformers>=2.2.2->chromadb) (3.25.2)\n", + "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.9/dist-packages (from transformers<5.0.0,>=4.6.0->sentence-transformers>=2.2.2->chromadb) (0.13.3)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.9/dist-packages (from transformers<5.0.0,>=4.6.0->sentence-transformers>=2.2.2->chromadb) (2022.10.31)\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.9/dist-packages (from nltk->sentence-transformers>=2.2.2->chromadb) (1.2.0)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.9/dist-packages (from scikit-learn->sentence-transformers>=2.2.2->chromadb) (3.1.0)\n", + "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.9/dist-packages (from torchvision->sentence-transformers>=2.2.2->chromadb) (9.5.0)\n", + "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.9/dist-packages (from anyio<5,>=3.4.0->starlette<0.27.0,>=0.26.1->fastapi>=0.85.1->chromadb) (1.3.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.9/dist-packages (from jinja2->torch>=1.6.0->sentence-transformers>=2.2.2->chromadb) (2.1.2)\n", + "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.9/dist-packages (from sympy->torch>=1.6.0->sentence-transformers>=2.2.2->chromadb) (1.3.0)\n", + "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", + "Requirement already satisfied: Cython in /usr/local/lib/python3.9/dist-packages (0.29.34)\n", + "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", + "Requirement already satisfied: tiktoken in /usr/local/lib/python3.9/dist-packages (0.3.3)\n", + "Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.9/dist-packages (from tiktoken) (2022.10.31)\n", + "Requirement already satisfied: requests>=2.26.0 in /usr/local/lib/python3.9/dist-packages (from tiktoken) (2.28.2)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests>=2.26.0->tiktoken) (1.26.15)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.9/dist-packages (from requests>=2.26.0->tiktoken) (2.0.12)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.9/dist-packages (from requests>=2.26.0->tiktoken) (2022.12.7)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests>=2.26.0->tiktoken) (3.4)\n", + "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", + "Requirement already satisfied: unstructured[local-inference] in /usr/local/lib/python3.9/dist-packages (0.5.12)\n", + "Requirement already satisfied: pypandoc in /usr/local/lib/python3.9/dist-packages (from unstructured[local-inference]) (1.11)\n", + "Requirement already satisfied: certifi>=2022.12.07 in /usr/local/lib/python3.9/dist-packages (from unstructured[local-inference]) (2022.12.7)\n", + "Requirement already satisfied: markdown in /usr/local/lib/python3.9/dist-packages (from unstructured[local-inference]) (3.4.3)\n", + "Requirement already satisfied: msg-parser in /usr/local/lib/python3.9/dist-packages (from unstructured[local-inference]) (1.2.0)\n", + "Requirement already satisfied: nltk in /usr/local/lib/python3.9/dist-packages (from unstructured[local-inference]) (3.8.1)\n", + "Requirement already satisfied: python-docx in /usr/local/lib/python3.9/dist-packages (from unstructured[local-inference]) (0.8.11)\n", + "Requirement already satisfied: lxml in /usr/local/lib/python3.9/dist-packages (from unstructured[local-inference]) (4.9.2)\n", + "Requirement already satisfied: python-pptx in /usr/local/lib/python3.9/dist-packages (from unstructured[local-inference]) (0.6.21)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.9/dist-packages (from unstructured[local-inference]) (2.28.2)\n", + "Requirement already satisfied: argilla in /usr/local/lib/python3.9/dist-packages (from unstructured[local-inference]) (1.6.0)\n", + "Requirement already satisfied: python-magic in /usr/local/lib/python3.9/dist-packages (from unstructured[local-inference]) (0.4.27)\n", + "Requirement already satisfied: openpyxl in /usr/local/lib/python3.9/dist-packages (from unstructured[local-inference]) (3.0.10)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.9/dist-packages (from unstructured[local-inference]) (1.5.3)\n", + "Requirement already satisfied: pillow in /usr/local/lib/python3.9/dist-packages (from unstructured[local-inference]) (9.5.0)\n", + "Requirement already satisfied: unstructured-inference==0.3.2 in /usr/local/lib/python3.9/dist-packages (from unstructured[local-inference]) (0.3.2)\n", + "Requirement already satisfied: fastapi in /usr/local/lib/python3.9/dist-packages (from unstructured-inference==0.3.2->unstructured[local-inference]) (0.95.1)\n", + "Requirement already satisfied: opencv-python==4.6.0.66 in /usr/local/lib/python3.9/dist-packages (from unstructured-inference==0.3.2->unstructured[local-inference]) (4.6.0.66)\n", + "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.9/dist-packages (from unstructured-inference==0.3.2->unstructured[local-inference]) (0.13.4)\n", + "Requirement already satisfied: layoutparser[layoutmodels,tesseract] in /usr/local/lib/python3.9/dist-packages (from unstructured-inference==0.3.2->unstructured[local-inference]) (0.3.4)\n", + "Requirement already satisfied: python-multipart in /usr/local/lib/python3.9/dist-packages (from unstructured-inference==0.3.2->unstructured[local-inference]) (0.0.6)\n", + "Requirement already satisfied: uvicorn in /usr/local/lib/python3.9/dist-packages (from unstructured-inference==0.3.2->unstructured[local-inference]) (0.21.1)\n", + "Requirement already satisfied: onnxruntime in /usr/local/lib/python3.9/dist-packages (from unstructured-inference==0.3.2->unstructured[local-inference]) (1.14.1)\n", + "Requirement already satisfied: transformers in /usr/local/lib/python3.9/dist-packages (from unstructured-inference==0.3.2->unstructured[local-inference]) (4.28.1)\n", + "Requirement already satisfied: numpy>=1.17.3 in /usr/local/lib/python3.9/dist-packages (from opencv-python==4.6.0.66->unstructured-inference==0.3.2->unstructured[local-inference]) (1.22.4)\n", + "Requirement already satisfied: tqdm>=4.27.0 in /usr/local/lib/python3.9/dist-packages (from argilla->unstructured[local-inference]) (4.65.0)\n", + "Requirement already satisfied: backoff in /usr/local/lib/python3.9/dist-packages (from argilla->unstructured[local-inference]) (2.2.1)\n", + "Requirement already satisfied: wrapt<1.15,>=1.13 in /usr/local/lib/python3.9/dist-packages (from argilla->unstructured[local-inference]) (1.14.1)\n", + "Requirement already satisfied: pydantic>=1.7.1 in /usr/local/lib/python3.9/dist-packages (from argilla->unstructured[local-inference]) (1.10.7)\n", + "Requirement already satisfied: rich<=13.0.1 in /usr/local/lib/python3.9/dist-packages (from argilla->unstructured[local-inference]) (13.0.1)\n", + "Requirement already satisfied: deprecated~=1.2.0 in /usr/local/lib/python3.9/dist-packages (from argilla->unstructured[local-inference]) (1.2.13)\n", + "Requirement already satisfied: httpx<0.24,>=0.15 in /usr/local/lib/python3.9/dist-packages (from argilla->unstructured[local-inference]) (0.23.3)\n", + "Requirement already satisfied: monotonic in /usr/local/lib/python3.9/dist-packages (from argilla->unstructured[local-inference]) (1.6)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.9/dist-packages (from argilla->unstructured[local-inference]) (23.0)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.9/dist-packages (from pandas->unstructured[local-inference]) (2022.7.1)\n", + "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.9/dist-packages (from pandas->unstructured[local-inference]) (2.8.2)\n", + "Requirement already satisfied: importlib-metadata>=4.4 in /usr/local/lib/python3.9/dist-packages (from markdown->unstructured[local-inference]) (6.3.0)\n", + "Requirement already satisfied: olefile>=0.46 in /usr/local/lib/python3.9/dist-packages (from msg-parser->unstructured[local-inference]) (0.46)\n", + "Requirement already satisfied: click in /usr/local/lib/python3.9/dist-packages (from nltk->unstructured[local-inference]) (8.1.3)\n", + "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.9/dist-packages (from nltk->unstructured[local-inference]) (2022.10.31)\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.9/dist-packages (from nltk->unstructured[local-inference]) (1.2.0)\n", + "Requirement already satisfied: et-xmlfile in /usr/local/lib/python3.9/dist-packages (from openpyxl->unstructured[local-inference]) (1.1.0)\n", + "Requirement already satisfied: XlsxWriter>=0.5.7 in /usr/local/lib/python3.9/dist-packages (from python-pptx->unstructured[local-inference]) (3.1.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.9/dist-packages (from requests->unstructured[local-inference]) (2.0.12)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests->unstructured[local-inference]) (1.26.15)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests->unstructured[local-inference]) (3.4)\n", + "Requirement already satisfied: httpcore<0.17.0,>=0.15.0 in /usr/local/lib/python3.9/dist-packages (from httpx<0.24,>=0.15->argilla->unstructured[local-inference]) (0.16.3)\n", + "Requirement already satisfied: sniffio in /usr/local/lib/python3.9/dist-packages (from httpx<0.24,>=0.15->argilla->unstructured[local-inference]) (1.3.0)\n", + "Requirement already satisfied: rfc3986[idna2008]<2,>=1.3 in /usr/local/lib/python3.9/dist-packages (from httpx<0.24,>=0.15->argilla->unstructured[local-inference]) (1.5.0)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.9/dist-packages (from importlib-metadata>=4.4->markdown->unstructured[local-inference]) (3.15.0)\n", + "Requirement already satisfied: typing-extensions>=4.2.0 in /usr/local/lib/python3.9/dist-packages (from pydantic>=1.7.1->argilla->unstructured[local-inference]) (4.5.0)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.9/dist-packages (from python-dateutil>=2.8.1->pandas->unstructured[local-inference]) (1.16.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.6.0 in /usr/local/lib/python3.9/dist-packages (from rich<=13.0.1->argilla->unstructured[local-inference]) (2.14.0)\n", + "Requirement already satisfied: commonmark<0.10.0,>=0.9.0 in /usr/local/lib/python3.9/dist-packages (from rich<=13.0.1->argilla->unstructured[local-inference]) (0.9.1)\n", + "Requirement already satisfied: starlette<0.27.0,>=0.26.1 in /usr/local/lib/python3.9/dist-packages (from fastapi->unstructured-inference==0.3.2->unstructured[local-inference]) (0.26.1)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.9/dist-packages (from huggingface-hub->unstructured-inference==0.3.2->unstructured[local-inference]) (3.11.0)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.9/dist-packages (from huggingface-hub->unstructured-inference==0.3.2->unstructured[local-inference]) (6.0)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.9/dist-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference==0.3.2->unstructured[local-inference]) (1.10.1)\n", + "Requirement already satisfied: iopath in /usr/local/lib/python3.9/dist-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference==0.3.2->unstructured[local-inference]) (0.1.10)\n", + "Requirement already satisfied: pdfplumber in /usr/local/lib/python3.9/dist-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference==0.3.2->unstructured[local-inference]) (0.9.0)\n", + "Requirement already satisfied: pdf2image in /usr/local/lib/python3.9/dist-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference==0.3.2->unstructured[local-inference]) (1.16.3)\n", + "Requirement already satisfied: effdet in /usr/local/lib/python3.9/dist-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference==0.3.2->unstructured[local-inference]) (0.3.0)\n", + "Requirement already satisfied: torchvision in /usr/local/lib/python3.9/dist-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference==0.3.2->unstructured[local-inference]) (0.15.1+cu118)\n", + "Requirement already satisfied: torch in /usr/local/lib/python3.9/dist-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference==0.3.2->unstructured[local-inference]) (2.0.0+cu118)\n", + "Requirement already satisfied: pytesseract in /usr/local/lib/python3.9/dist-packages (from layoutparser[layoutmodels,tesseract]->unstructured-inference==0.3.2->unstructured[local-inference]) (0.3.10)\n", + "Requirement already satisfied: coloredlogs in /usr/local/lib/python3.9/dist-packages (from onnxruntime->unstructured-inference==0.3.2->unstructured[local-inference]) (15.0.1)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.9/dist-packages (from onnxruntime->unstructured-inference==0.3.2->unstructured[local-inference]) (1.11.1)\n", + "Requirement already satisfied: flatbuffers in /usr/local/lib/python3.9/dist-packages (from onnxruntime->unstructured-inference==0.3.2->unstructured[local-inference]) (23.3.3)\n", + "Requirement already satisfied: protobuf in /usr/local/lib/python3.9/dist-packages (from onnxruntime->unstructured-inference==0.3.2->unstructured[local-inference]) (3.20.3)\n", + "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.9/dist-packages (from transformers->unstructured-inference==0.3.2->unstructured[local-inference]) (0.13.3)\n", + "Requirement already satisfied: h11>=0.8 in /usr/local/lib/python3.9/dist-packages (from uvicorn->unstructured-inference==0.3.2->unstructured[local-inference]) (0.14.0)\n", + "Requirement already satisfied: anyio<5.0,>=3.0 in /usr/local/lib/python3.9/dist-packages (from httpcore<0.17.0,>=0.15.0->httpx<0.24,>=0.15->argilla->unstructured[local-inference]) (3.6.2)\n", + "Requirement already satisfied: humanfriendly>=9.1 in /usr/local/lib/python3.9/dist-packages (from coloredlogs->onnxruntime->unstructured-inference==0.3.2->unstructured[local-inference]) (10.0)\n", + "Requirement already satisfied: timm>=0.4.12 in /usr/local/lib/python3.9/dist-packages (from effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.3.2->unstructured[local-inference]) (0.6.13)\n", + "Requirement already satisfied: pycocotools>=2.0.2 in /usr/local/lib/python3.9/dist-packages (from effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.3.2->unstructured[local-inference]) (2.0.6)\n", + "Requirement already satisfied: omegaconf>=2.0 in /usr/local/lib/python3.9/dist-packages (from effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.3.2->unstructured[local-inference]) (2.3.0)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.9/dist-packages (from torch->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.3.2->unstructured[local-inference]) (3.1.2)\n", + "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.9/dist-packages (from torch->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.3.2->unstructured[local-inference]) (2.0.0)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.9/dist-packages (from torch->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.3.2->unstructured[local-inference]) (3.1)\n", + "Requirement already satisfied: cmake in /usr/local/lib/python3.9/dist-packages (from triton==2.0.0->torch->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.3.2->unstructured[local-inference]) (3.25.2)\n", + "Requirement already satisfied: lit in /usr/local/lib/python3.9/dist-packages (from triton==2.0.0->torch->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.3.2->unstructured[local-inference]) (16.0.1)\n", + "Requirement already satisfied: portalocker in /usr/local/lib/python3.9/dist-packages (from iopath->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.3.2->unstructured[local-inference]) (2.7.0)\n", + "Requirement already satisfied: pdfminer.six==20221105 in /usr/local/lib/python3.9/dist-packages (from pdfplumber->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.3.2->unstructured[local-inference]) (20221105)\n", + "Requirement already satisfied: Wand>=0.6.10 in /usr/local/lib/python3.9/dist-packages (from pdfplumber->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.3.2->unstructured[local-inference]) (0.6.11)\n", + "Requirement already satisfied: cryptography>=36.0.0 in /usr/local/lib/python3.9/dist-packages (from pdfminer.six==20221105->pdfplumber->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.3.2->unstructured[local-inference]) (40.0.1)\n", + "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.9/dist-packages (from sympy->onnxruntime->unstructured-inference==0.3.2->unstructured[local-inference]) (1.3.0)\n", + "Requirement already satisfied: antlr4-python3-runtime==4.9.* in /usr/local/lib/python3.9/dist-packages (from omegaconf>=2.0->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.3.2->unstructured[local-inference]) (4.9.3)\n", + "Requirement already satisfied: matplotlib>=2.1.0 in /usr/local/lib/python3.9/dist-packages (from pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.3.2->unstructured[local-inference]) (3.7.1)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.9/dist-packages (from jinja2->torch->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.3.2->unstructured[local-inference]) (2.1.2)\n", + "Requirement already satisfied: cffi>=1.12 in /usr/local/lib/python3.9/dist-packages (from cryptography>=36.0.0->pdfminer.six==20221105->pdfplumber->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.3.2->unstructured[local-inference]) (1.15.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.9/dist-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.3.2->unstructured[local-inference]) (4.39.3)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.9/dist-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.3.2->unstructured[local-inference]) (0.11.0)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.9/dist-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.3.2->unstructured[local-inference]) (3.0.9)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.9/dist-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.3.2->unstructured[local-inference]) (1.4.4)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.9/dist-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.3.2->unstructured[local-inference]) (1.0.7)\n", + "Requirement already satisfied: importlib-resources>=3.2.0 in /usr/local/lib/python3.9/dist-packages (from matplotlib>=2.1.0->pycocotools>=2.0.2->effdet->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.3.2->unstructured[local-inference]) (5.12.0)\n", + "Requirement already satisfied: pycparser in /usr/local/lib/python3.9/dist-packages (from cffi>=1.12->cryptography>=36.0.0->pdfminer.six==20221105->pdfplumber->layoutparser[layoutmodels,tesseract]->unstructured-inference==0.3.2->unstructured[local-inference]) (2.21)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from langchain.document_loaders import UnstructuredPDFLoader\n", + "from langchain.indexes import VectorstoreIndexCreator" + ], + "metadata": { + "id": "juVVrXsUfybx" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# connect your Google Drive\n", + "from google.colab import drive\n", + "drive.mount('/content/gdrive', force_remount=True)\n", + "\n", + "pdf_folder_path = '/content/gdrive/My Drive/data_2/'\n", + "os.listdir(pdf_folder_path)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "3z1YygkktO12", + "outputId": "a22cac2c-674a-4113-9b6b-5f41f69a10d3" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Mounted at /content/gdrive\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['2023_GPT4All_Technical_Report.pdf', '2008.10010.pdf']" + ] + }, + "metadata": {}, + "execution_count": 129 + } + ] + }, + { + "cell_type": "code", + "source": [ + "loaders = [UnstructuredPDFLoader(os.path.join(pdf_folder_path, fn)) for fn in os.listdir(pdf_folder_path)]\n", + "loaders" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "P5lRcsUWtO42", + "outputId": "1f95d0dc-229a-48be-eec1-3ec042a17ab5" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[,\n", + " ]" + ] + }, + "metadata": {}, + "execution_count": 130 + } + ] + }, + { + "cell_type": "code", + "source": [ + "index = VectorstoreIndexCreator(\n", + " embedding=HuggingFaceEmbeddings(),\n", + " text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)).from_loaders(loaders)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "tBsI8myktO8Y", + "outputId": "6aa8eaff-3576-4a0e-8fb5-732a2dbc4485" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "WARNING:unstructured:detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "WARNING:unstructured:detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "WARNING:chromadb:Using embedded DuckDB without persistence: data will be transient\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "llm=HuggingFaceHub(repo_id=\"google/flan-t5-xl\", model_kwargs={\"temperature\":0, \"max_length\":512})" + ], + "metadata": { + "id": "7py0d3o5tO_h" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from langchain.chains import RetrievalQA\n", + "chain = RetrievalQA.from_chain_type(llm=llm,\n", + " chain_type=\"stuff\",\n", + " retriever=index.vectorstore.as_retriever(),\n", + " input_key=\"question\")" + ], + "metadata": { + "id": "31lq4mOdtPDL" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "chain.run('How was the GPT4all model trained?')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "19VMjTc_tPKB", + "outputId": "7874458b-018e-40e1-ab78-f6b6ec8bcab2" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'LoRA'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 135 + } + ] + }, + { + "cell_type": "code", + "source": [ + "chain.run('Who are the authors of GPT4all technical report?')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "biOFwTy4tQBE", + "outputId": "00e24163-4183-4201-d108-2180c76d57a9" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'Yuvanesh Anand'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 136 + } + ] + }, + { + "cell_type": "code", + "source": [ + "chain.run('What is the model size of GPT4all?')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "MJ_1DvnDgb_X", + "outputId": "2a06984c-4158-4409-d233-56c9f10e13a9" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'quantized 4-bit versions of the model allowing virtually anyone to run the model on CPU'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 92 + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "eSbmtRTJj8Gf" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/architects_of_future/FineTuned/Zephyrfinetune.ipynb b/architects_of_future/FineTuned/Zephyrfinetune.ipynb new file mode 100644 index 00000000..1d8d80f3 --- /dev/null +++ b/architects_of_future/FineTuned/Zephyrfinetune.ipynb @@ -0,0 +1,2222 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "bbfd67637ba742d8ae56be1038be586c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "VBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "VBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "VBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_37dd96e35ec9497da5b9f757da532f1f", + "IPY_MODEL_e1fa2caf4c704025b71e4ade36229e7d", + "IPY_MODEL_adfc899f4e7b41e7b7acaefc0f85a83a", + "IPY_MODEL_7a7a3bbe0ac14cd08a693b6692bbd453" + ], + "layout": "IPY_MODEL_898223bc221d469598a42df848188698" + } + }, + "8b2ed548635e40baa8f5f749aef3d796": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ac9262fd707a47919578a439c09df637", + "placeholder": "​", + "style": "IPY_MODEL_4085671224e040efb6582673f7547f95", + "value": "

Copy a token from your Hugging Face\ntokens page and paste it below.
Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file.
" + } + }, + "29bf90ded9104c3c828d7ac3580ad5ad": { + "model_module": "@jupyter-widgets/controls", + "model_name": "PasswordModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "PasswordModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "PasswordView", + "continuous_update": true, + "description": "Token:", + "description_tooltip": null, + "disabled": false, + "layout": "IPY_MODEL_5b4d7c9fff324fa2a27ce6660836d4e9", + "placeholder": "​", + "style": "IPY_MODEL_646137eec2ec46b68327f48bc8e9aa08", + "value": "" + } + }, + "6db4f1a7811845a8866d373c2a01990b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "CheckboxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "CheckboxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "CheckboxView", + "description": "Add token as git credential?", + "description_tooltip": null, + "disabled": false, + "indent": true, + "layout": "IPY_MODEL_99a9cab465a3467e8860db4603a27876", + "style": "IPY_MODEL_4123fd49345247f2b3d6ea0490c4f5b2", + "value": true + } + }, + "4308d435255a432d9c5cfecc4fb3cf0b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ButtonModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ButtonModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ButtonView", + "button_style": "", + "description": "Login", + "disabled": false, + "icon": "", + "layout": "IPY_MODEL_05ef30d7ef694c3aa68665aeb94cab2e", + "style": "IPY_MODEL_a902d903ea29486994ff31765ef96717", + "tooltip": "" + } + }, + "65b7c985d0d14555b904093b56ee825c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1b6eec4b9db847a2a4b9b849f39bb4cf", + "placeholder": "​", + "style": "IPY_MODEL_fc2b32e5aee342bf88e88d90c72042a4", + "value": "\nPro Tip: If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. " + } + }, + "898223bc221d469598a42df848188698": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": "center", + "align_self": null, + "border": null, + "bottom": null, + "display": "flex", + "flex": null, + "flex_flow": "column", + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "50%" + } + }, + "ac9262fd707a47919578a439c09df637": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4085671224e040efb6582673f7547f95": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "5b4d7c9fff324fa2a27ce6660836d4e9": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "646137eec2ec46b68327f48bc8e9aa08": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "99a9cab465a3467e8860db4603a27876": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4123fd49345247f2b3d6ea0490c4f5b2": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "05ef30d7ef694c3aa68665aeb94cab2e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a902d903ea29486994ff31765ef96717": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ButtonStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ButtonStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "button_color": null, + "font_weight": "" + } + }, + "1b6eec4b9db847a2a4b9b849f39bb4cf": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fc2b32e5aee342bf88e88d90c72042a4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "2119aa0864e54dcda4c1a67175dac67b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "LabelModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "LabelModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "LabelView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4fed19474855489b963de9da73f403aa", + "placeholder": "​", + "style": "IPY_MODEL_ec6955a2510848ecbec1162d2252fe2a", + "value": "Connecting..." + } + }, + "4fed19474855489b963de9da73f403aa": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ec6955a2510848ecbec1162d2252fe2a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "37dd96e35ec9497da5b9f757da532f1f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "LabelModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "LabelModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "LabelView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_bd844eac571e4305b062538f85661a64", + "placeholder": "​", + "style": "IPY_MODEL_1dae3ec7caa84dcb8a379f7c68dc82fa", + "value": "Token is valid (permission: write)." + } + }, + "e1fa2caf4c704025b71e4ade36229e7d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "LabelModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "LabelModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "LabelView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_090d11f5153a49ccafabd84bae1020c9", + "placeholder": "​", + "style": "IPY_MODEL_fc4243c627ae49cf94382ad007bafeea", + "value": "Your token has been saved in your configured git credential helpers (store)." + } + }, + "adfc899f4e7b41e7b7acaefc0f85a83a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "LabelModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "LabelModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "LabelView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f2bff279bdbc40cca115e5102f698663", + "placeholder": "​", + "style": "IPY_MODEL_410933c40e854ae788e893478e75530d", + "value": "Your token has been saved to /root/.cache/huggingface/token" + } + }, + "7a7a3bbe0ac14cd08a693b6692bbd453": { + "model_module": "@jupyter-widgets/controls", + "model_name": "LabelModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "LabelModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "LabelView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2b51afbce30840f7ae474a8b6ad45eb6", + "placeholder": "​", + "style": "IPY_MODEL_e2ed47cdfc934ec1bbf98946e9f2c65f", + "value": "Login successful" + } + }, + "bd844eac571e4305b062538f85661a64": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1dae3ec7caa84dcb8a379f7c68dc82fa": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "090d11f5153a49ccafabd84bae1020c9": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fc4243c627ae49cf94382ad007bafeea": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "f2bff279bdbc40cca115e5102f698663": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "410933c40e854ae788e893478e75530d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "2b51afbce30840f7ae474a8b6ad45eb6": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e2ed47cdfc934ec1bbf98946e9f2c65f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Installing Dependencies" + ], + "metadata": { + "id": "Zi65RoJIkMKI" + } + }, + { + "cell_type": "code", + "source": [ + "! pip install datasets transformers trl peft accelerate bitsandbytes auto-gptq optimum modin" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "pwMj-8TYjcw6", + "outputId": "2915aa3b-7fcb-4e41-8b03-957e79b59221" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: datasets in /usr/local/lib/python3.10/dist-packages (2.15.0)\n", + "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.35.2)\n", + "Requirement already satisfied: trl in /usr/local/lib/python3.10/dist-packages (0.7.4)\n", + "Requirement already satisfied: peft in /usr/local/lib/python3.10/dist-packages (0.6.2)\n", + "Requirement already satisfied: accelerate in /usr/local/lib/python3.10/dist-packages (0.25.0)\n", + "Requirement already satisfied: bitsandbytes in /usr/local/lib/python3.10/dist-packages (0.41.2.post2)\n", + "Requirement already satisfied: auto-gptq in /usr/local/lib/python3.10/dist-packages (0.5.1)\n", + "Requirement already satisfied: optimum in /usr/local/lib/python3.10/dist-packages (1.14.1)\n", + "Requirement already satisfied: modin in /usr/local/lib/python3.10/dist-packages (0.25.1)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (1.23.5)\n", + "Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (9.0.0)\n", + "Requirement already satisfied: pyarrow-hotfix in /usr/local/lib/python3.10/dist-packages (from datasets) (0.6)\n", + "Requirement already satisfied: dill<0.3.8,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.3.7)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (2.1.3)\n", + "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (2.31.0)\n", + "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (4.66.1)\n", + "Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets) (3.4.1)\n", + "Requirement already satisfied: multiprocess in /usr/local/lib/python3.10/dist-packages (from datasets) (0.70.15)\n", + "Requirement already satisfied: fsspec[http]<=2023.10.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (2023.6.0)\n", + "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.9.1)\n", + "Requirement already satisfied: huggingface-hub>=0.18.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.19.4)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets) (23.2)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (6.0.1)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.13.1)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2023.6.3)\n", + "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.15.0)\n", + "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.4.1)\n", + "Requirement already satisfied: torch>=1.4.0 in /usr/local/lib/python3.10/dist-packages (from trl) (2.1.0+cu118)\n", + "Requirement already satisfied: tyro>=0.5.11 in /usr/local/lib/python3.10/dist-packages (from trl) (0.6.0)\n", + "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from peft) (5.9.5)\n", + "Requirement already satisfied: sentencepiece in /usr/local/lib/python3.10/dist-packages (from auto-gptq) (0.1.99)\n", + "Requirement already satisfied: rouge in /usr/local/lib/python3.10/dist-packages (from auto-gptq) (1.0.1)\n", + "Requirement already satisfied: gekko in /usr/local/lib/python3.10/dist-packages (from auto-gptq) (1.0.6)\n", + "Requirement already satisfied: coloredlogs in /usr/local/lib/python3.10/dist-packages (from optimum) (15.0.1)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from optimum) (1.12)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (23.1.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.0.4)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.9.3)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.4.0)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.18.0->datasets) (4.5.0)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2023.3.post1)\n", + "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2023.3)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (3.6)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (2023.11.17)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.4.0->trl) (3.2.1)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.4.0->trl) (3.1.2)\n", + "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.4.0->trl) (2.1.0)\n", + "Requirement already satisfied: protobuf in /usr/local/lib/python3.10/dist-packages (from transformers) (3.20.3)\n", + "Requirement already satisfied: docstring-parser>=0.14.1 in /usr/local/lib/python3.10/dist-packages (from tyro>=0.5.11->trl) (0.15)\n", + "Requirement already satisfied: rich>=11.1.0 in /usr/local/lib/python3.10/dist-packages (from tyro>=0.5.11->trl) (13.7.0)\n", + "Requirement already satisfied: shtab>=1.5.6 in /usr/local/lib/python3.10/dist-packages (from tyro>=0.5.11->trl) (1.6.5)\n", + "Requirement already satisfied: humanfriendly>=9.1 in /usr/local/lib/python3.10/dist-packages (from coloredlogs->optimum) (10.0)\n", + "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from rouge->auto-gptq) (1.16.0)\n", + "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->optimum) (1.3.0)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=11.1.0->tyro>=0.5.11->trl) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich>=11.1.0->tyro>=0.5.11->trl) (2.16.1)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.4.0->trl) (2.1.3)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=11.1.0->tyro>=0.5.11->trl) (0.1.2)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from huggingface_hub import notebook_login\n", + "notebook_login()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 145, + "referenced_widgets": [ + "bbfd67637ba742d8ae56be1038be586c", + "8b2ed548635e40baa8f5f749aef3d796", + "29bf90ded9104c3c828d7ac3580ad5ad", + "6db4f1a7811845a8866d373c2a01990b", + "4308d435255a432d9c5cfecc4fb3cf0b", + "65b7c985d0d14555b904093b56ee825c", + "898223bc221d469598a42df848188698", + "ac9262fd707a47919578a439c09df637", + "4085671224e040efb6582673f7547f95", + "5b4d7c9fff324fa2a27ce6660836d4e9", + "646137eec2ec46b68327f48bc8e9aa08", + "99a9cab465a3467e8860db4603a27876", + "4123fd49345247f2b3d6ea0490c4f5b2", + "05ef30d7ef694c3aa68665aeb94cab2e", + "a902d903ea29486994ff31765ef96717", + "1b6eec4b9db847a2a4b9b849f39bb4cf", + "fc2b32e5aee342bf88e88d90c72042a4", + "2119aa0864e54dcda4c1a67175dac67b", + "4fed19474855489b963de9da73f403aa", + "ec6955a2510848ecbec1162d2252fe2a", + "37dd96e35ec9497da5b9f757da532f1f", + "e1fa2caf4c704025b71e4ade36229e7d", + "adfc899f4e7b41e7b7acaefc0f85a83a", + "7a7a3bbe0ac14cd08a693b6692bbd453", + "bd844eac571e4305b062538f85661a64", + "1dae3ec7caa84dcb8a379f7c68dc82fa", + "090d11f5153a49ccafabd84bae1020c9", + "fc4243c627ae49cf94382ad007bafeea", + "f2bff279bdbc40cca115e5102f698663", + "410933c40e854ae788e893478e75530d", + "2b51afbce30840f7ae474a8b6ad45eb6", + "e2ed47cdfc934ec1bbf98946e9f2c65f" + ] + }, + "id": "biyFiZQWkBgx", + "outputId": "d81ac0a1-6de9-412d-a322-0bb226e6e40e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "VBox(children=(HTML(value='
\\n You are a AI powered summarization Assistant. also use your own knolwedge about indian law and give the output accordingly.\\n<|user|>\\n\" + example[self.config.INSTRUCTION_FIELD] + \"\\n<|assistant|>\\n\" + example[self.config.TARGET_FIELD]\n", + "\n", + " return processed_example\n", + "\n", + " def create_dataset(self):\n", + "\n", + " '''\n", + " Downloads and processes the dataset\n", + "\n", + " Returns:\n", + " processed_data: Training ready processed dataset\n", + " '''\n", + "\n", + " data = load_dataset(self.config.DATASET_ID, split=\"train\")\n", + "\n", + " print(\"\\n====================================================================\\n\")\n", + " print(\"\\t\\t\\tDOWNLOADED DATASET\")\n", + " print(\"\\n====================================================================\\n\")\n", + "\n", + " df = data.to_pandas()\n", + " print(df)\n", + " df[self.config.DATASET_TEXT_FIELD] = df[[self.config.INSTRUCTION_FIELD, self.config.TARGET_FIELD]].apply(lambda x: self.process_data_sample(x), axis=1)\n", + "\n", + " print(\"\\n====================================================================\\n\")\n", + " print(\"\\t\\t\\tPROCESSED DATASET\")\n", + "\n", + " print(\"\\n====================================================================\\n\")\n", + " print(df[[self.config.DATASET_TEXT_FIELD]])\n", + "\n", + " processed_data = Dataset.from_pandas(df[[self.config.DATASET_TEXT_FIELD]])\n", + " return processed_data\n", + "\n", + " def prepare_model(self):\n", + "\n", + " '''\n", + " Prepares model for finetuning by quantizing it and attaching lora modules to the model\n", + "\n", + " Returns:\n", + " model - Model ready for finetuning\n", + " peft_config - LoRA Adapter config\n", + " '''\n", + "\n", + " bnb_config = GPTQConfig(\n", + " bits=self.config.BITS,\n", + " disable_exllama=self.config.DISABLE_EXLLAMA,\n", + " tokenizer=self.tokenizer\n", + " )\n", + "\n", + " model = AutoModelForCausalLM.from_pretrained(\n", + " self.config.MODEL_ID,\n", + " quantization_config=bnb_config,\n", + " device_map=self.config.DEVICE_MAP\n", + " )\n", + "\n", + " print(\"\\n====================================================================\\n\")\n", + " print(\"\\t\\t\\tDOWNLOADED MODEL\")\n", + " print(\"\\n====================================================================\\n\")\n", + "\n", + " model.config.use_cache=self.config.USE_CACHE\n", + " model.config.pretraining_tp=1\n", + " model.gradient_checkpointing_enable()\n", + " model = prepare_model_for_kbit_training(model)\n", + "\n", + " print(\"\\n====================================================================\\n\")\n", + " print(\"\\t\\t\\tMODEL CONFIG UPDATED\")\n", + " print(\"\\n====================================================================\\n\")\n", + "\n", + " peft_config = LoraConfig(\n", + " r=self.config.LORA_R,\n", + " lora_alpha=self.config.LORA_ALPHA,\n", + " lora_dropout=self.config.LORA_DROPOUT,\n", + " bias=self.config.BIAS,\n", + " task_type=self.config.TASK_TYPE,\n", + " target_modules=self.config.TARGET_MODULES\n", + " )\n", + "\n", + " model = get_peft_model(model, peft_config)\n", + "\n", + " print(\"\\n====================================================================\\n\")\n", + " print(\"\\t\\t\\tPREPARED MODEL FOR FINETUNING\")\n", + " print(\"\\n====================================================================\\n\")\n", + "\n", + " return model, peft_config\n", + "\n", + " def set_training_arguments(self):\n", + "\n", + " '''\n", + " Sets the arguments for the training loop in TrainingArguments class\n", + " '''\n", + "\n", + " training_arguments = TrainingArguments(\n", + " output_dir=self.config.OUTPUT_DIR,\n", + " per_device_train_batch_size=self.config.BATCH_SIZE,\n", + " gradient_accumulation_steps=self.config.GRAD_ACCUMULATION_STEPS,\n", + " optim=self.config.OPTIMIZER,\n", + " learning_rate=self.config.LR,\n", + " lr_scheduler_type=self.config.LR_SCHEDULER,\n", + " save_strategy=self.config.SAVE_STRATEGY,\n", + " logging_steps=self.config.LOGGING_STEPS,\n", + " num_train_epochs=self.config.NUM_TRAIN_EPOCHS,\n", + " max_steps=self.config.MAX_STEPS,\n", + " fp16=self.config.FP16,\n", + " push_to_hub=self.config.PUSH_TO_HUB\n", + " )\n", + "\n", + " return training_arguments\n", + "\n", + " def train(self):\n", + "\n", + " '''\n", + " Trains the model on the specified dataset in config\n", + " '''\n", + "\n", + " data = self.create_dataset()\n", + " model, peft_config = self.prepare_model()\n", + " training_args = self.set_training_arguments()\n", + "\n", + " print(\"\\n====================================================================\\n\")\n", + " print(\"\\t\\t\\tPREPARED FOR FINETUNING\")\n", + " print(\"\\n====================================================================\\n\")\n", + "\n", + " trainer = SFTTrainer(\n", + " model=model,\n", + " train_dataset=data,\n", + " peft_config=peft_config,\n", + " dataset_text_field=self.config.DATASET_TEXT_FIELD,\n", + " args=training_args,\n", + " tokenizer=self.tokenizer,\n", + " packing=self.config.PACKING,\n", + " max_seq_length=self.config.MAX_SEQ_LENGTH\n", + " )\n", + " trainer.train()\n", + "\n", + " print(\"\\n====================================================================\\n\")\n", + " print(\"\\t\\t\\tFINETUNING COMPLETED\")\n", + " print(\"\\n====================================================================\\n\")\n", + "\n", + " trainer.push_to_hub()" + ], + "metadata": { + "id": "jpj9gBxula4Z" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "if __name__ == \"__main__\":\n", + " zephyr_trainer = ZephyrTrainer()\n", + " zephyr_trainer.train()" + ], + "metadata": { + "id": "jUhYereLF380", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "27271b2d-8c33-4880-d958-ca06d4fe24a1" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/pyarrow/pandas_compat.py:373: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.\n", + " if _pandas_api.is_sparse(col):\n", + "Using `disable_exllama` is deprecated and will be removed in version 4.37. Use `use_exllama` instead and specify the version with `exllama_config`.The value of `use_exllama` will be overwritten by `disable_exllama` passed in `GPTQConfig` or stored in your config file.\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "====================================================================\n", + "\n", + "\t\t\tDOWNLOADED DATASET\n", + "\n", + "====================================================================\n", + "\n", + " Article Text \\\n", + "0 Campbell: E-mail row 'silly fuss'\\n\\nEx-No 10 ... \n", + "1 Labour plans maternity pay rise\\n\\nMaternity p... \n", + "2 Howard rebuts asylum criticisms\\n\\nTory leader... \n", + "3 PM apology over jailings\\n\\nTony Blair has apo... \n", + "4 'Errors' doomed first Dome sale\\n\\nThe initial... \n", + "... ... \n", + "2219 Veteran Martinez wins Thai title\\n\\nConchita M... \n", + "2220 Mirza makes Indian tennis history\\n\\nTeenager ... \n", + "2221 Soderling wins tense Milan final\\n\\nFifth seed... \n", + "2222 Roddick to face Saulnier in final\\n\\nAndy Rodd... \n", + "2223 Big guns ease through in San Jose\\n\\nTop-seede... \n", + "\n", + " Summary \n", + "0 Mr Campbell messaged Newsnight after the progr... \n", + "1 She said her party would boost maternity pay i... \n", + "2 Former Tory chancellor Ken Clarke says Mr Howa... \n", + "3 However, one of the so-called Guildford Four, ... \n", + "4 The NAO report said that this sale went throug... \n", + "... ... \n", + "2219 I think the whole week was good for me.\"\"I thi... \n", + "2220 Teenager Sania Mirza completed a superb week a... \n", + "2221 Fifth seed Robin Soderling took the Milan Indo... \n", + "2222 Andy Roddick will play Cyril Saulnier in the f... \n", + "2223 Top-seeded Americans Andy Roddick and Andre Ag... \n", + "\n", + "[2224 rows x 2 columns]\n", + "\n", + "====================================================================\n", + "\n", + "\t\t\tPROCESSED DATASET\n", + "\n", + "====================================================================\n", + "\n", + " text\n", + "0 <|system|>\\n You are a AI powered summarizatio...\n", + "1 <|system|>\\n You are a AI powered summarizatio...\n", + "2 <|system|>\\n You are a AI powered summarizatio...\n", + "3 <|system|>\\n You are a AI powered summarizatio...\n", + "4 <|system|>\\n You are a AI powered summarizatio...\n", + "... ...\n", + "2219 <|system|>\\n You are a AI powered summarizatio...\n", + "2220 <|system|>\\n You are a AI powered summarizatio...\n", + "2221 <|system|>\\n You are a AI powered summarizatio...\n", + "2222 <|system|>\\n You are a AI powered summarizatio...\n", + "2223 <|system|>\\n You are a AI powered summarizatio...\n", + "\n", + "[2224 rows x 1 columns]\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "You passed `quantization_config` to `from_pretrained` but the model you're loading already has a `quantization_config` attribute and has already quantized weights. However, loading attributes (e.g. use_exllama, exllama_config, use_cuda_fp16, max_input_length) will be overwritten with the one you passed to `from_pretrained`. The rest will be ignored.\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "====================================================================\n", + "\n", + "\t\t\tDOWNLOADED MODEL\n", + "\n", + "====================================================================\n", + "\n", + "\n", + "====================================================================\n", + "\n", + "\t\t\tMODEL CONFIG UPDATED\n", + "\n", + "====================================================================\n", + "\n", + "\n", + "====================================================================\n", + "\n", + "\t\t\tPREPARED MODEL FOR FINETUNING\n", + "\n", + "====================================================================\n", + "\n" + ] + }, + { + "output_type": "error", + "ename": "ValueError", + "evalue": "ignored", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m__name__\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"__main__\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mzephyr_trainer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mZephyrTrainer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mzephyr_trainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 142\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcreate_dataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 143\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpeft_config\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprepare_model\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 144\u001b[0;31m \u001b[0mtraining_args\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_training_arguments\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 145\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 146\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"\\n====================================================================\\n\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m\u001b[0m in \u001b[0;36mset_training_arguments\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 117\u001b[0m '''\n\u001b[1;32m 118\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 119\u001b[0;31m training_arguments = TrainingArguments(\n\u001b[0m\u001b[1;32m 120\u001b[0m \u001b[0moutput_dir\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mOUTPUT_DIR\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 121\u001b[0m \u001b[0mper_device_train_batch_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mBATCH_SIZE\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/training_args.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, output_dir, overwrite_output_dir, do_train, do_eval, do_predict, evaluation_strategy, prediction_loss_only, per_device_train_batch_size, per_device_eval_batch_size, per_gpu_train_batch_size, per_gpu_eval_batch_size, gradient_accumulation_steps, eval_accumulation_steps, eval_delay, learning_rate, weight_decay, adam_beta1, adam_beta2, adam_epsilon, max_grad_norm, num_train_epochs, max_steps, lr_scheduler_type, warmup_ratio, warmup_steps, log_level, log_level_replica, log_on_each_node, logging_dir, logging_strategy, logging_first_step, logging_steps, logging_nan_inf_filter, save_strategy, save_steps, save_total_limit, save_safetensors, save_on_each_node, no_cuda, use_cpu, use_mps_device, seed, data_seed, jit_mode_eval, use_ipex, bf16, fp16, fp16_opt_level, half_precision_backend, bf16_full_eval, fp16_full_eval, tf32, local_rank, ddp_backend, tpu_num_cores, tpu_metrics_debug, debug, dataloader_drop_last, eval_steps, dataloader_num_workers, past_index, run_name, disable_tqdm, remove_unused_columns, label_names, load_best_model_at_end, metric_for_best_model, greater_is_better, ignore_data_skip, fsdp, fsdp_min_num_params, fsdp_config, fsdp_transformer_layer_cls_to_wrap, deepspeed, label_smoothing_factor, optim, optim_args, adafactor, group_by_length, length_column_name, report_to, ddp_find_unused_parameters, ddp_bucket_cap_mb, ddp_broadcast_buffers, dataloader_pin_memo...\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/training_args.py\u001b[0m in \u001b[0;36m__post_init__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1446\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfp16\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfp16_full_eval\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1447\u001b[0m ):\n\u001b[0;32m-> 1448\u001b[0;31m raise ValueError(\n\u001b[0m\u001b[1;32m 1449\u001b[0m \u001b[0;34m\"FP16 Mixed precision training with AMP or APEX (`--fp16`) and FP16 half precision evaluation\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1450\u001b[0m \u001b[0;34m\" (`--fp16_full_eval`) can only be used on CUDA or NPU devices or certain XPU devices (with IPEX).\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: FP16 Mixed precision training with AMP or APEX (`--fp16`) and FP16 half precision evaluation (`--fp16_full_eval`) can only be used on CUDA or NPU devices or certain XPU devices (with IPEX)." + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "#Saving model\n", + "Move the folder to Google Drive" + ], + "metadata": { + "id": "nM30_nT9m6I-" + } + }, + { + "cell_type": "code", + "source": [ + "folder_to_move = '/content/sample2'\n", + "destination_folder = '/content/drive/MyDrive/'\n", + "shutil.move(folder_to_move, destination_folder)" + ], + "metadata": { + "id": "FFHkEMLD-_g1" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/drive')" + ], + "metadata": { + "id": "3iF7kkTB_snV" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "#Inference\n", + "loading model localy\n" + ], + "metadata": { + "id": "d34S03nEqLjU" + } + }, + { + "cell_type": "code", + "source": [ + "def process_data_sample(example):\n", + "\n", + " processed_example = \"<|system|>\\n You are document sumarizer who is going to sumarise the content without missing any keypoints in a concise manner.truncate the input if it it beyond length you can handle.always give a complete sentence which makes sense and inform how much word you can handle.\\n<|user|>\\n\" + example[\"instruction\"] + \"\\n<|assistant|>\\n\"\n", + "\n", + " return processed_example\n", + "tokenizer = AutoTokenizer.from_pretrained(\"/content/drive/MyDrive/intel hackathon/sample2\")\n", + "sentence='''\n", + "appeal no. lxvi of 1949. appeal from the high court of judicature, bombay, in a reference under section 66 of the indian income tax act, 1022. k.m. munshi (n. p. nathvani, with him), for the appel lant. ' m.c. setalvad, attorney general for india (h. j. umrigar, with him), for the respondent. 1950. may 26. the judgment of the court was delivered by mehr chand mahajan j. this is an appeal against a judgment of the high court of judicature at bombay in an income tax matter and it raises the question whether munici pal property tax and urban immoveable property tax payable under the relevant bombay acts are allowable deductions under section 9 (1) (iv) of the indian income tax act. the assessee company is an investment company deriving its income from properties in the city of bombay. for the assessment year 1940 41 the net income of the assessee under the head \"property\" was computed by the income tax officer in the sum of rs. 6,21,764 after deducting from gross rents certain payments. the company had paid during the relevant year rs. 1,22,675 as municipal property tax and rs. 32,760 as urban property tax. deduction of these two sums was claimed under the provisions of section 9 the act. out of the first item a deduction in the sum of rs. 48,572 was allowed on the ground that this item represented tenants ' burdens paid by the assessee, otherwise the claim was disal lowed. the, appeals of the assessee to the appellate as sistant commissioner and to the income tax appellate tribu nal were unsuccessful. the tribunal, however, agreed to refer two questions of law to the high court of judicature at bombay, namely, (1) whether the municipal taxes paid by the applicant company are an allowable deduction under 555 the provisions of section 9 (1) (iv) of the indian income tax act; (2) whether the urban immoveable property taxes paid by the applicant company are an allowable deduction under section 9 (1) (iv) or under section 9 (1) (v) of the indian income tax act. a supplementary reference was made covering a third question which was not raised before us and it is not there fore necessary to refer to it. the high court answered all the three questions in the negative and hence this appeal. the question for our determination is whether the munic ipal property tax and urban immoveable property tax can be deducted as an allowance under clause (iv) of sub section (1) of section 9 of the act. the decision of the point depends firstly on the construction of the language employed in sub clause (iv) of sub section (1) of section 9 of the act, and secondly, on a finding as to the true nature and character of the liability of the owner under the relevant bombay acts for the payment of these taxes. section 9 along with the relevant clause runs thus: (1) the tax shall be payable by an assessee under the head ' income from property ' in respect of the bona fide annual value of property consisting of any buildings or lands appurtenant thereto of which he is the owner, . . subject to the following allowances, namely : (iv) where the property is subject to a mortgage or other capital charge, the amount of any interest on such mortgage or charge; where the property is subject to an annual charge not being a capital charge, the. amount of such charge; where the property is subject to a ground rent, the amount of such ground rent; and, where the property has been acquired, constructed, repaired, renewed or recon structed with borrowed capital, the amount of any interest payable on such capital; . . . \" it will be seen that clause (iv) consists of four sub clauses corresponding to the four deductions allowed 556 under the clause. before the amending act of 1939, clause (iv) contained only the first, third and fourth sub clauses. under the first sub clause interest is deductible whether the amount borrowed on the security of the property was spent on the property or not\n", + "'''\n", + "inp_str = process_data_sample(\n", + " {\n", + " \"instruction\": sentence,\n", + " }\n", + ")\n", + "\n", + "inputs = tokenizer(inp_str, return_tensors=\"pt\").to(\"cuda\")\n", + "\n", + "model = AutoPeftModelForCausalLM.from_pretrained(\n", + " \"/content/drive/MyDrive/intel hackathon/sample2\",\n", + " low_cpu_mem_usage=True,\n", + " return_dict=True,\n", + " torch_dtype=torch.float16,\n", + " device_map=\"cuda\")\n", + "\n", + "generation_config = GenerationConfig(\n", + " do_sample=True,\n", + " top_k=1,\n", + " temperature=0.1,\n", + " max_new_tokens=256,\n", + " pad_token_id=tokenizer.eos_token_id\n", + ")" + ], + "metadata": { + "id": "MXD1W06xGSKt" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import time\n", + "st_time = time.time()\n", + "outputs = model.generate(**inputs, generation_config=generation_config)\n", + "print(tokenizer.decode(outputs[0], skip_special_tokens=True))\n", + "print(time.time()-st_time)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "aJ_G1F-LcbgH", + "outputId": "b5418668-0c91-4ac5-deec-5403956deea6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "<|system|>\n", + " You are document sumarizer who is going to sumarise the content without missing any keypoints in a concise manner.truncate the input if it it beyond length you can handle.always give a complete sentence which makes sense and inform how much word you can handle. \n", + "<|user|>\n", + "\n", + "appeal no. lxvi of 1949. appeal from the high court of judicature, bombay, in a reference under section 66 of the indian income tax act, 1022. k.m. munshi (n. p. nathvani, with him), for the appel lant. ' m.c. setalvad, attorney general for india (h. j. umrigar, with him), for the respondent. 1950. may 26. the judgment of the court was delivered by mehr chand mahajan j. this is an appeal against a judgment of the high court of judicature at bombay in an income tax matter and it raises the question whether munici pal property tax and urban immoveable property tax payable under the relevant bombay acts are allowable deductions under section 9 (1) (iv) of the indian income tax act. the assessee company is an investment company deriving its income from properties in the city of bombay. for the assessment year 1940 41 the net income of the assessee under the head \"property\" was computed by the income tax officer in the sum of rs. 6,21,764 after deducting from gross rents certain payments. the company had paid during the relevant year rs. 1,22,675 as municipal property tax and rs. 32,760 as urban property tax. deduction of these two sums was claimed under the provisions of section 9 the act. out of the first item a deduction in the sum of rs. 48,572 was allowed on the ground that this item represented tenants ' burdens paid by the assessee, otherwise the claim was disal lowed. the, appeals of the assessee to the appellate as sistant commissioner and to the income tax appellate tribu nal were unsuccessful. the tribunal, however, agreed to refer two questions of law to the high court of judicature at bombay, namely, (1) whether the municipal taxes paid by the applicant company are an allowable deduction under 555 the provisions of section 9 (1) (iv) of the indian income tax act; (2) whether the urban immoveable property taxes paid by the applicant company are an allowable deduction under section 9 (1) (iv) or under section 9 (1) (v) of the indian income tax act. a supplementary reference was made covering a third question which was not raised before us and it is not there fore necessary to refer to it. the high court answered all the three questions in the negative and hence this appeal. the question for our determination is whether the munic ipal property tax and urban immoveable property tax can be deducted as an allowance under clause (iv) of sub section (1) of section 9 of the act. the decision of the point depends firstly on the construction of the language employed in sub clause (iv) of sub section (1) of section 9 of the act, and secondly, on a finding as to the true nature and character of the liability of the owner under the relevant bombay acts for the payment of these taxes. section 9 along with the relevant clause runs thus: (1) the tax shall be payable by an assessee under the head ' income from property ' in respect of the bona fide annual value of property consisting of any buildings or lands appurtenant thereto of which he is the owner, . . subject to the following allowances, namely : (iv) where the property is subject to a mortgage or other capital charge, the amount of any interest on such mortgage or charge; where the property is subject to an annual charge not being a capital charge, the. amount of such charge; where the property is subject to a ground rent, the amount of such ground rent; and, where the property has been acquired, constructed, repaired, renewed or recon structed with borrowed capital, the amount of any interest payable on such capital; . . . \" it will be seen that clause (iv) consists of four sub clauses corresponding to the four deductions allowed 556 under the clause. before the amending act of 1939, clause (iv) contained only the first, third and fourth sub clauses. under the first sub clause interest is deductible whether the amount borrowed on the security of the property was spent on the property or not\n", + " \n", + "<|assistant|>\n", + "the decision of the point depends firstly on the construction of the language employed in sub clause (iv) of sub section (1) of section 9 of the act, and secondly, on a finding as to the true nature and character of the liability of the owner under the relevant bombay acts for the payment of these taxes.the decision of the high court was delivered by mehr chand mahajan j.the assessee company is an investment company deriving its income from properties in the city of bombay.the high court answered all the three questions in the negative and hence this appeal.the company had paid during the relevant year rs. 1,22,675 as municipal property tax and rs. 32,760 as urban immoveable property tax.the question for our determination is whether the munic ipal property tax and urban immoveable property tax can be deducted as an allowance under clause (iv) of sub section (1) of section 9 of the act.the income tax officer had computed the net income of the assessee under the head \"property\" by deducting from gross rents certain payments.the decision of the high court was delivered by m.\n", + "296.4066882133484\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip freeze ? requirements.txt" + ], + "metadata": { + "id": "sAcSFUk8dC11", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "a8b9046f-9d71-406e-9aa7-05cd703d91b2" + }, + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "absl-py==1.4.0\n", + "aiohttp==3.9.1\n", + "aiosignal==1.3.1\n", + "alabaster==0.7.13\n", + "albumentations==1.3.1\n", + "altair==4.2.2\n", + "anyio==3.7.1\n", + "appdirs==1.4.4\n", + "argon2-cffi==23.1.0\n", + "argon2-cffi-bindings==21.2.0\n", + "array-record==0.5.0\n", + "arviz==0.15.1\n", + "astropy==5.3.4\n", + "astunparse==1.6.3\n", + "async-timeout==4.0.3\n", + "atpublic==4.0\n", + "attrs==23.1.0\n", + "audioread==3.0.1\n", + "autograd==1.6.2\n", + "Babel==2.13.1\n", + "backcall==0.2.0\n", + "beautifulsoup4==4.11.2\n", + "bidict==0.22.1\n", + "bigframes==0.15.0\n", + "bleach==6.1.0\n", + "blinker==1.4\n", + "blis==0.7.11\n", + "blosc2==2.0.0\n", + "bokeh==3.3.1\n", + "bqplot==0.12.42\n", + "branca==0.7.0\n", + "build==1.0.3\n", + "CacheControl==0.13.1\n", + "cachetools==5.3.2\n", + "catalogue==2.0.10\n", + "certifi==2023.11.17\n", + "cffi==1.16.0\n", + "chardet==5.2.0\n", + "charset-normalizer==3.3.2\n", + "chex==0.1.7\n", + "click==8.1.7\n", + "click-plugins==1.1.1\n", + "cligj==0.7.2\n", + "cloudpickle==2.2.1\n", + "cmake==3.27.7\n", + "cmdstanpy==1.2.0\n", + "colorcet==3.0.1\n", + "colorlover==0.3.0\n", + "colour==0.1.5\n", + "community==1.0.0b1\n", + "confection==0.1.4\n", + "cons==0.4.6\n", + "contextlib2==21.6.0\n", + "contourpy==1.2.0\n", + "cryptography==41.0.7\n", + "cufflinks==0.17.3\n", + "cupy-cuda11x==11.0.0\n", + "cvxopt==1.3.2\n", + "cvxpy==1.3.2\n", + "cycler==0.12.1\n", + "cymem==2.0.8\n", + "Cython==3.0.6\n", + "dask==2023.8.1\n", + "datascience==0.17.6\n", + "db-dtypes==1.1.1\n", + "dbus-python==1.2.18\n", + "debugpy==1.6.6\n", + "decorator==4.4.2\n", + "defusedxml==0.7.1\n", + "diskcache==5.6.3\n", + "distributed==2023.8.1\n", + "distro==1.7.0\n", + "dlib==19.24.2\n", + "dm-tree==0.1.8\n", + "docutils==0.18.1\n", + "dopamine-rl==4.0.6\n", + "duckdb==0.9.2\n", + "earthengine-api==0.1.381\n", + "easydict==1.11\n", + "ecos==2.0.12\n", + "editdistance==0.6.2\n", + "eerepr==0.0.4\n", + "en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.6.0/en_core_web_sm-3.6.0-py3-none-any.whl#sha256=83276fc78a70045627144786b52e1f2728ad5e29e5e43916ec37ea9c26a11212\n", + "entrypoints==0.4\n", + "et-xmlfile==1.1.0\n", + "etils==1.5.2\n", + "etuples==0.3.9\n", + "exceptiongroup==1.2.0\n", + "fastai==2.7.13\n", + "fastcore==1.5.29\n", + "fastdownload==0.0.7\n", + "fastjsonschema==2.19.0\n", + "fastprogress==1.0.3\n", + "fastrlock==0.8.2\n", + "filelock==3.13.1\n", + "fiona==1.9.5\n", + "firebase-admin==5.3.0\n", + "Flask==2.2.5\n", + "flatbuffers==23.5.26\n", + "flax==0.7.5\n", + "folium==0.14.0\n", + "fonttools==4.45.1\n", + "frozendict==2.3.10\n", + "frozenlist==1.4.0\n", + "fsspec==2023.6.0\n", + "future==0.18.3\n", + "gast==0.5.4\n", + "gcsfs==2023.6.0\n", + "GDAL==3.4.3\n", + "gdown==4.6.6\n", + "geemap==0.28.2\n", + "gensim==4.3.2\n", + "geocoder==1.38.1\n", + "geographiclib==2.0\n", + "geopandas==0.13.2\n", + "geopy==2.3.0\n", + "gin-config==0.5.0\n", + "glob2==0.7\n", + "google==2.0.3\n", + "google-ai-generativelanguage==0.3.3\n", + "google-api-core==2.11.1\n", + "google-api-python-client==2.84.0\n", + "google-auth==2.17.3\n", + "google-auth-httplib2==0.1.1\n", + "google-auth-oauthlib==1.0.0\n", + "google-cloud-aiplatform==1.36.4\n", + "google-cloud-bigquery==3.12.0\n", + "google-cloud-bigquery-connection==1.12.1\n", + "google-cloud-bigquery-storage==2.23.0\n", + "google-cloud-core==2.3.3\n", + "google-cloud-datastore==2.15.2\n", + "google-cloud-firestore==2.11.1\n", + "google-cloud-functions==1.13.3\n", + "google-cloud-iam==2.12.2\n", + "google-cloud-language==2.9.1\n", + "google-cloud-resource-manager==1.10.4\n", + "google-cloud-storage==2.8.0\n", + "google-cloud-translate==3.11.3\n", + "google-colab @ file:///colabtools/dist/google-colab-1.0.0.tar.gz#sha256=1e49d2e09c34a7c1be521d9f01d40cea4094d4fa6fe403c41c5ff0b6e3a86211\n", + "google-crc32c==1.5.0\n", + "google-generativeai==0.2.2\n", + "google-pasta==0.2.0\n", + "google-resumable-media==2.6.0\n", + "googleapis-common-protos==1.61.0\n", + "googledrivedownloader==0.4\n", + "graphviz==0.20.1\n", + "greenlet==3.0.1\n", + "grpc-google-iam-v1==0.12.7\n", + "grpcio==1.59.3\n", + "grpcio-status==1.48.2\n", + "gspread==3.4.2\n", + "gspread-dataframe==3.3.1\n", + "gym==0.25.2\n", + "gym-notices==0.0.8\n", + "h5netcdf==1.3.0\n", + "h5py==3.9.0\n", + "holidays==0.37\n", + "holoviews==1.17.1\n", + "html5lib==1.1\n", + "httpimport==1.3.1\n", + "httplib2==0.22.0\n", + "huggingface-hub==0.19.4\n", + "humanize==4.7.0\n", + "hyperopt==0.2.7\n", + "ibis-framework==6.2.0\n", + "idna==3.6\n", + "imageio==2.31.6\n", + "imageio-ffmpeg==0.4.9\n", + "imagesize==1.4.1\n", + "imbalanced-learn==0.10.1\n", + "imgaug==0.4.0\n", + "importlib-metadata==6.8.0\n", + "importlib-resources==6.1.1\n", + "imutils==0.5.4\n", + "inflect==7.0.0\n", + "iniconfig==2.0.0\n", + "install==1.3.5\n", + "intel-openmp==2023.2.0\n", + "ipyevents==2.0.2\n", + "ipyfilechooser==0.6.0\n", + "ipykernel==5.5.6\n", + "ipyleaflet==0.18.0\n", + "ipython==7.34.0\n", + "ipython-genutils==0.2.0\n", + "ipython-sql==0.5.0\n", + "ipytree==0.2.2\n", + "ipywidgets==7.7.1\n", + "itsdangerous==2.1.2\n", + "jax==0.4.20\n", + "jaxlib @ https://storage.googleapis.com/jax-releases/cuda11/jaxlib-0.4.20+cuda11.cudnn86-cp310-cp310-manylinux2014_x86_64.whl#sha256=01be66238133f884bf5adf15cd7eaaf8445f9d4b056c5c64df28a997a6aff2fe\n", + "jeepney==0.7.1\n", + "jieba==0.42.1\n", + "Jinja2==3.1.2\n", + "joblib==1.3.2\n", + "jsonpickle==3.0.2\n", + "jsonschema==4.19.2\n", + "jsonschema-specifications==2023.11.2\n", + "jupyter-client==6.1.12\n", + "jupyter-console==6.1.0\n", + "jupyter-server==1.24.0\n", + "jupyter_core==5.5.0\n", + "jupyterlab-widgets==3.0.9\n", + "jupyterlab_pygments==0.3.0\n", + "kaggle==1.5.16\n", + "keras==2.14.0\n", + "keyring==23.5.0\n", + "kiwisolver==1.4.5\n", + "langcodes==3.3.0\n", + "launchpadlib==1.10.16\n", + "lazr.restfulclient==0.14.4\n", + "lazr.uri==1.0.6\n", + "lazy_loader==0.3\n", + "libclang==16.0.6\n", + "librosa==0.10.1\n", + "lida==0.0.10\n", + "lightgbm==4.1.0\n", + "linkify-it-py==2.0.2\n", + "llmx==0.0.15a0\n", + "llvmlite==0.41.1\n", + "locket==1.0.0\n", + "logical-unification==0.4.6\n", + "lxml==4.9.3\n", + "malloy==2023.1064\n", + "Markdown==3.5.1\n", + "markdown-it-py==3.0.0\n", + "MarkupSafe==2.1.3\n", + "matplotlib==3.7.1\n", + "matplotlib-inline==0.1.6\n", + "matplotlib-venn==0.11.9\n", + "mdit-py-plugins==0.4.0\n", + "mdurl==0.1.2\n", + "miniKanren==1.0.3\n", + "missingno==0.5.2\n", + "mistune==0.8.4\n", + "mizani==0.9.3\n", + "mkl==2023.2.0\n", + "ml-dtypes==0.2.0\n", + "mlxtend==0.22.0\n", + "more-itertools==10.1.0\n", + "moviepy==1.0.3\n", + "mpmath==1.3.0\n", + "msgpack==1.0.7\n", + "multidict==6.0.4\n", + "multipledispatch==1.0.0\n", + "multitasking==0.0.11\n", + "murmurhash==1.0.10\n", + "music21==9.1.0\n", + "natsort==8.4.0\n", + "nbclassic==1.0.0\n", + "nbclient==0.9.0\n", + "nbconvert==6.5.4\n", + "nbformat==5.9.2\n", + "nest-asyncio==1.5.8\n", + "networkx==3.2.1\n", + "nibabel==4.0.2\n", + "nltk==3.8.1\n", + "notebook==6.5.5\n", + "notebook_shim==0.2.3\n", + "numba==0.58.1\n", + "numexpr==2.8.7\n", + "numpy==1.23.5\n", + "oauth2client==4.1.3\n", + "oauthlib==3.2.2\n", + "opencv-contrib-python==4.8.0.76\n", + "opencv-python==4.8.0.76\n", + "opencv-python-headless==4.8.1.78\n", + "openpyxl==3.1.2\n", + "opt-einsum==3.3.0\n", + "optax==0.1.7\n", + "orbax-checkpoint==0.4.3\n", + "osqp==0.6.2.post8\n", + "packaging==23.2\n", + "pandas==1.5.3\n", + "pandas-datareader==0.10.0\n", + "pandas-gbq==0.17.9\n", + "pandas-stubs==1.5.3.230304\n", + "pandocfilters==1.5.0\n", + "panel==1.3.4\n", + "param==2.0.1\n", + "parso==0.8.3\n", + "parsy==2.1\n", + "partd==1.4.1\n", + "pathlib==1.0.1\n", + "pathy==0.10.3\n", + "patsy==0.5.3\n", + "peewee==3.17.0\n", + "pexpect==4.9.0\n", + "pickleshare==0.7.5\n", + "Pillow==9.4.0\n", + "pip-tools==6.13.0\n", + "platformdirs==4.0.0\n", + "plotly==5.15.0\n", + "plotnine==0.12.4\n", + "pluggy==1.3.0\n", + "polars==0.17.3\n", + "pooch==1.8.0\n", + "portpicker==1.5.2\n", + "prefetch-generator==1.0.3\n", + "preshed==3.0.9\n", + "prettytable==3.9.0\n", + "proglog==0.1.10\n", + "progressbar2==4.2.0\n", + "prometheus-client==0.19.0\n", + "promise==2.3\n", + "prompt-toolkit==3.0.41\n", + "prophet==1.1.5\n", + "proto-plus==1.22.3\n", + "protobuf==3.20.3\n", + "psutil==5.9.5\n", + "psycopg2==2.9.9\n", + "ptyprocess==0.7.0\n", + "py-cpuinfo==9.0.0\n", + "py4j==0.10.9.7\n", + "pyarrow==9.0.0\n", + "pyasn1==0.5.1\n", + "pyasn1-modules==0.3.0\n", + "pycocotools==2.0.7\n", + "pycparser==2.21\n", + "pyct==0.5.0\n", + "pydantic==1.10.13\n", + "pydata-google-auth==1.8.2\n", + "pydot==1.4.2\n", + "pydot-ng==2.0.0\n", + "pydotplus==2.0.2\n", + "PyDrive==1.3.1\n", + "PyDrive2==1.6.3\n", + "pyerfa==2.0.1.1\n", + "pygame==2.5.2\n", + "Pygments==2.16.1\n", + "PyGObject==3.42.1\n", + "PyJWT==2.3.0\n", + "pymc==5.7.2\n", + "pymystem3==0.2.0\n", + "PyOpenGL==3.1.7\n", + "pyOpenSSL==23.3.0\n", + "pyparsing==3.1.1\n", + "pyperclip==1.8.2\n", + "pyproj==3.6.1\n", + "pyproject_hooks==1.0.0\n", + "pyshp==2.3.1\n", + "PySocks==1.7.1\n", + "pytensor==2.14.2\n", + "pytest==7.4.3\n", + "python-apt==0.0.0\n", + "python-box==7.1.1\n", + "python-dateutil==2.8.2\n", + "python-louvain==0.16\n", + "python-slugify==8.0.1\n", + "python-utils==3.8.1\n", + "pytz==2023.3.post1\n", + "pyviz_comms==3.0.0\n", + "PyWavelets==1.5.0\n", + "PyYAML==6.0.1\n", + "pyzmq==23.2.1\n", + "qdldl==0.1.7.post0\n", + "qudida==0.0.4\n", + "ratelim==0.1.6\n", + "referencing==0.31.1\n", + "regex==2023.6.3\n", + "requests==2.31.0\n", + "requests-oauthlib==1.3.1\n", + "requirements-parser==0.5.0\n", + "rich==13.7.0\n", + "rpds-py==0.13.2\n", + "rpy2==3.4.2\n", + "rsa==4.9\n", + "safetensors==0.4.1\n", + "scikit-image==0.19.3\n", + "scikit-learn==1.2.2\n", + "scipy==1.11.4\n", + "scooby==0.9.2\n", + "scs==3.2.4.post1\n", + "seaborn==0.12.2\n", + "SecretStorage==3.3.1\n", + "Send2Trash==1.8.2\n", + "shapely==2.0.2\n", + "six==1.16.0\n", + "sklearn-pandas==2.2.0\n", + "smart-open==6.4.0\n", + "sniffio==1.3.0\n", + "snowballstemmer==2.2.0\n", + "sortedcontainers==2.4.0\n", + "soundfile==0.12.1\n", + "soupsieve==2.5\n", + "soxr==0.3.7\n", + "spacy==3.6.1\n", + "spacy-legacy==3.0.12\n", + "spacy-loggers==1.0.5\n", + "Sphinx==5.0.2\n", + "sphinxcontrib-applehelp==1.0.7\n", + "sphinxcontrib-devhelp==1.0.5\n", + "sphinxcontrib-htmlhelp==2.0.4\n", + "sphinxcontrib-jsmath==1.0.1\n", + "sphinxcontrib-qthelp==1.0.6\n", + "sphinxcontrib-serializinghtml==1.1.9\n", + "SQLAlchemy==2.0.23\n", + "sqlglot==17.16.2\n", + "sqlparse==0.4.4\n", + "srsly==2.4.8\n", + "stanio==0.3.0\n", + "statsmodels==0.14.0\n", + "sympy==1.12\n", + "tables==3.8.0\n", + "tabulate==0.9.0\n", + "tbb==2021.11.0\n", + "tblib==3.0.0\n", + "tenacity==8.2.3\n", + "tensorboard==2.14.1\n", + "tensorboard-data-server==0.7.2\n", + "tensorflow==2.14.0\n", + "tensorflow-datasets==4.9.3\n", + "tensorflow-estimator==2.14.0\n", + "tensorflow-gcs-config==2.14.0\n", + "tensorflow-hub==0.15.0\n", + "tensorflow-io-gcs-filesystem==0.34.0\n", + "tensorflow-metadata==1.14.0\n", + "tensorflow-probability==0.22.0\n", + "tensorstore==0.1.45\n", + "termcolor==2.3.0\n", + "terminado==0.18.0\n", + "text-unidecode==1.3\n", + "textblob==0.17.1\n", + "tf-slim==1.1.0\n", + "thinc==8.1.12\n", + "threadpoolctl==3.2.0\n", + "tifffile==2023.9.26\n", + "tinycss2==1.2.1\n", + "tokenizers==0.15.0\n", + "toml==0.10.2\n", + "tomli==2.0.1\n", + "toolz==0.12.0\n", + "torch @ https://download.pytorch.org/whl/cu118/torch-2.1.0%2Bcu118-cp310-cp310-linux_x86_64.whl#sha256=a81b554184492005543ddc32e96469f9369d778dedd195d73bda9bed407d6589\n", + "torchaudio @ https://download.pytorch.org/whl/cu118/torchaudio-2.1.0%2Bcu118-cp310-cp310-linux_x86_64.whl#sha256=cdfd0a129406155eee595f408cafbb92589652da4090d1d2040f5453d4cae71f\n", + "torchdata==0.7.0\n", + "torchsummary==1.5.1\n", + "torchtext==0.16.0\n", + "torchvision @ https://download.pytorch.org/whl/cu118/torchvision-0.16.0%2Bcu118-cp310-cp310-linux_x86_64.whl#sha256=033712f65d45afe806676c4129dfe601ad1321d9e092df62b15847c02d4061dc\n", + "tornado==6.3.2\n", + "tqdm==4.66.1\n", + "traitlets==5.7.1\n", + "traittypes==0.2.1\n", + "transformers==4.35.2\n", + "triton==2.1.0\n", + "tweepy==4.14.0\n", + "typer==0.9.0\n", + "types-pytz==2023.3.1.1\n", + "types-setuptools==69.0.0.0\n", + "typing_extensions==4.5.0\n", + "tzlocal==5.2\n", + "uc-micro-py==1.0.2\n", + "uritemplate==4.1.1\n", + "urllib3==2.0.7\n", + "vega-datasets==0.9.0\n", + "wadllib==1.3.6\n", + "wasabi==1.1.2\n", + "wcwidth==0.2.12\n", + "webcolors==1.13\n", + "webencodings==0.5.1\n", + "websocket-client==1.6.4\n", + "Werkzeug==3.0.1\n", + "widgetsnbextension==3.6.6\n", + "wordcloud==1.9.2\n", + "wrapt==1.14.1\n", + "xarray==2023.7.0\n", + "xarray-einstats==0.6.0\n", + "xgboost==2.0.2\n", + "xlrd==2.0.1\n", + "xxhash==3.4.1\n", + "xyzservices==2023.10.1\n", + "yarl==1.9.3\n", + "yellowbrick==1.5\n", + "yfinance==0.2.32\n", + "zict==3.0.0\n", + "zipp==3.17.0\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "R19WQMGeMr8_" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/architects_of_future/FineTuned/sumarization_t5model.ipynb b/architects_of_future/FineTuned/sumarization_t5model.ipynb new file mode 100644 index 00000000..36e54ae2 --- /dev/null +++ b/architects_of_future/FineTuned/sumarization_t5model.ipynb @@ -0,0 +1,1252 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "!pip uninstall torch torchvision -y\n", + "!pip install torch==1.8.1 torchvision==0.9.1 torchtext==0.9.0\n" + ], + "metadata": { + "id": "onRGUEPp4IDw" + } + }, + { + "cell_type": "code", + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/drive')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NSjRnqJR5CVQ", + "outputId": "d10001c0-6e1b-4b54-f410-705af9d5d7a7" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install accelerate\n", + "\n", + "!pip install transformers -U" + ], + "metadata": { + "id": "mNlvmIUn4M7k", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "42ce1911-c73c-4b50-a94d-4a93844dc37b" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: accelerate in /usr/local/lib/python3.10/dist-packages (0.25.0)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from accelerate) (1.23.5)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from accelerate) (23.2)\n", + "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate) (5.9.5)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from accelerate) (6.0.1)\n", + "Requirement already satisfied: torch>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from accelerate) (2.1.0+cu118)\n", + "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (from accelerate) (0.19.4)\n", + "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from accelerate) (0.4.1)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.13.1)\n", + "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (4.5.0)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (1.12)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.2.1)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.1.2)\n", + "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (2023.6.0)\n", + "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (2.1.0)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->accelerate) (2.31.0)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->accelerate) (4.66.1)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.10.0->accelerate) (2.1.3)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate) (3.6)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate) (2023.11.17)\n", + "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.10.0->accelerate) (1.3.0)\n", + "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.35.2)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.13.1)\n", + "Requirement already satisfied: huggingface-hub<1.0,>=0.16.4 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.19.4)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.23.5)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.2)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2023.6.3)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.31.0)\n", + "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.15.0)\n", + "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.4.1)\n", + "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.1)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->transformers) (2023.6.0)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->transformers) (4.5.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.6)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2023.11.17)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install modin" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "fyI5Vkxvh1M0", + "outputId": "da7bb0da-7b86-4fa6-8f09-dae521ff76e5" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: modin in /usr/local/lib/python3.10/dist-packages (0.25.1)\n", + "Requirement already satisfied: pandas<2.2,>=2.1 in /usr/local/lib/python3.10/dist-packages (from modin) (2.1.3)\n", + "Requirement already satisfied: packaging>=21.0 in /usr/local/lib/python3.10/dist-packages (from modin) (23.2)\n", + "Requirement already satisfied: numpy>=1.22.4 in /usr/local/lib/python3.10/dist-packages (from modin) (1.23.5)\n", + "Requirement already satisfied: fsspec>=2022.05.0 in /usr/local/lib/python3.10/dist-packages (from modin) (2023.6.0)\n", + "Requirement already satisfied: psutil>=5.8.0 in /usr/local/lib/python3.10/dist-packages (from modin) (5.9.5)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas<2.2,>=2.1->modin) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas<2.2,>=2.1->modin) (2023.3.post1)\n", + "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas<2.2,>=2.1->modin) (2023.3)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas<2.2,>=2.1->modin) (1.16.0)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "# Assuming you have two time variables, let's call them time1 and time2\n", + "time1 = 98.04223680496216 # Replace this with your actual time variable\n", + "time2 = 124.3553466796875 # Replace this with your actual time variable\n", + "\n", + "# Visual representation\n", + "labels = ['With Intel extension', 'Without Intel extension']\n", + "times = [time1, time2]\n", + "\n", + "plt.plot(labels, times, marker='o', linestyle='-', color='b')\n", + "plt.ylabel('Time (seconds)')\n", + "plt.title('Inference Time of Hugging Face Model')\n", + "\n", + "# Add annotations for the actual time values\n", + "for label, time_value in zip(labels, times):\n", + " plt.annotate(f'{time_value:.2f} seconds', (label, time_value), textcoords=\"offset points\", xytext=(0,5), ha='center')\n", + "\n", + "plt.show()" + ], + "metadata": { + "id": "qJJJdsi_w9V5", + "outputId": "a54d63b6-1a09-48fa-d351-90eb043a872a", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 452 + } + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAnUAAAGzCAYAAABJruFgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABtcElEQVR4nO3de3zO9f/H8cc1dnLYmLAtzCmnnMn5GKGk5HyeM5VDzungUH0jpRAR9UVSIYeUIqchtJyGhNCcT2Vs5jCzvX9/fH67vl22MTO7dnjeb7fr1t6fz+f6XK/PZ1f7vLyPNmOMQURERETSNRdnByAiIiIiD05JnYiIiEgGoKROREREJANQUiciIiKSASipExEREckAlNSJiIiIZABK6kREREQyACV1IiIiIhmAkjoRERGRDEBJnaQrkZGR9O7dG19fX2w2G6+88oqzQ0rzgoKCsNlsBAUFOTuUZHn//fcpWrQoWbJkoWLFis4OJ8V0796dwoULOzsMScSD/H4aNGhAgwYNUjQekaRQUiepat68edhsNnbu3Jms97/77rvMmzePF198kQULFtC1a9cUjjB96N69Ozab7Z6v7t27OzvUB/Lzzz8zcuRIateuzdy5c3n33XcTPbZ79+7kyJEj0f02m40BAwY8jDDTnQYNGiT6nTl06JCzw3MQF1fv3r0T3P/666/bj/nnn39SOTqRtCWrswMQuR8bNmygRo0ajB071tmhOFW/fv1o3LixvRwaGsqYMWPo27cvdevWtW8vVqwY1atX58aNG7i5uTkj1AeyYcMGXFxc+Pzzz9Nl/HczZ84cYmNjnfb5BQoUYMKECfG2+/v7OyGau/Pw8GDp0qV88skn8b4HX3/9NR4eHty8edNJ0YmkHUrqJF25ePEiZcqUSbHzxcbGcuvWLTw8PFLsnKmhZs2a1KxZ017euXMnY8aMoWbNmnTp0iXe8ent+uJcvHgRT0/PDJfQAbi6ujr18729vRP8rqRFzZo1Y+XKlfz00088//zz9u3btm0jNDSU1q1bs3TpUidGKJI2qPlVnC6u2ezMmTO0bNmSHDlykDdvXoYPH05MTAzwv35hoaGhrFq1yt7ccvz4cQCioqIYO3YsxYsXx93dnYIFCzJy5EiioqIcPiuuCW7hwoU8/vjjuLu7s3r1agDOnDlDz549yZ8/P+7u7jz++OP897//dXh/XByLFy/mP//5DwUKFMDDw4NGjRpx9OjReNcWHBzMM888Q+7cucmePTvly5dn6tSpDsccOnSINm3a4OPjg4eHB1WrVmXlypUpdXsT7FPXoEEDypYty759+6hfvz7ZsmWjePHifPvttwBs2rSJ6tWr4+npScmSJVm3bl288yblfiXm9u3bvP322xQrVgx3d3cKFy7Ma6+95vD7stlszJ07l2vXrtl/3/PmzXuge/FvcV0B4r5DcRLrgzhjxgyKFi2Kp6cn1apVY8uWLQn2nTpx4gTPPfcc2bNnJ1++fAwZMoQ1a9bEO+edfbaOHz+OzWbjgw8+YPbs2fZ788QTT7Bjx4548S9ZsoQyZcrg4eFB2bJlWb58eYr10/vuu+9o3rw5/v7+uLu7U6xYMd5++237/4//lhrf8UcffZR69erx1VdfOWxfuHAh5cqVo2zZsgm+b8mSJVSpUgVPT08eeeQRunTpwpkzZ+Idt2LFCsqWLetwLxMSGxvLlClTePzxx/Hw8CB//vz069ePy5cvJ/laRB4m1dRJmhATE0PTpk2pXr06H3zwAevWrWPy5MkUK1aMF198kdKlS7NgwQKGDBlCgQIFGDZsGAB58+YlNjaW5557jl9++YW+fftSunRp9u/fz0cffcSff/7JihUrHD5rw4YNLF68mAEDBvDII49QuHBhLly4QI0aNexJX968efnpp5/o1asXERER8QZkTJw4ERcXF4YPH054eDiTJk2ic+fOBAcH249Zu3Ytzz77LH5+fgwePBhfX18OHjzIDz/8wODBgwE4cOAAtWvX5tFHH+XVV18le/bsLF68mJYtW7J06VJeeOGFh3bPL1++zLPPPkuHDh1o27YtM2fOpEOHDixcuJBXXnmF/v3706lTJ95//33atGnDqVOnyJkzJ8B936879e7dm/nz59OmTRuGDRtGcHAwEyZM4ODBg/YH6oIFC5g9eza//fYbn332GQC1atW653U9jH5VM2fOZMCAAdStW5chQ4Zw/PhxWrZsSe7cuSlQoID9uGvXrvHkk09y7tw5++/8q6++YuPGjUn+rK+++oqrV6/Sr18/bDYbkyZNolWrVvz111/22r1Vq1bRvn17ypUrx4QJE7h8+TK9evXi0UcfTfLnxMTExLtXHh4e5MiRg3nz5pEjRw6GDh1Kjhw52LBhA2PGjCEiIoL333/ffnxqfsc7derE4MGDiYyMJEeOHNy+fZslS5YwdOjQBJte582bR48ePXjiiSeYMGECFy5cYOrUqWzdupU9e/aQK1cuwOq32bp1a8qUKcOECRO4dOkSPXr0cPi9xunXr5/9vIMGDSI0NJTp06ezZ88etm7d6vTaVxGMSCqaO3euAcyOHTvs2wIDAw1g3nrrLYdjK1WqZKpUqeKwLSAgwDRv3txh24IFC4yLi4vZsmWLw/ZZs2YZwGzdutW+DTAuLi7mwIEDDsf26tXL+Pn5mX/++cdhe4cOHYy3t7e5fv26McaYjRs3GsCULl3aREVF2Y+bOnWqAcz+/fuNMcbcvn3bFClSxAQEBJjLly87nDM2Ntb+c6NGjUy5cuXMzZs3HfbXqlXLPPbYYyapduzYYQAzd+7cePviYt64caN9W/369Q1gvvrqK/u2Q4cO2e/Pr7/+at++Zs2aeOdO6v1KSEhIiAFM7969HbYPHz7cAGbDhg32bYGBgSZ79uz3unz7scBdXy+//LL9+LjvYmhoqMN57rxfUVFRJk+ePOaJJ54w0dHR9uPmzZtnAFO/fn37tsmTJxvArFixwr7txo0bplSpUvF+B4GBgSYgIMBeDg0NNYDJkyePCQsLs2//7rvvDGC+//57+7Zy5cqZAgUKmKtXr9q3BQUFGcDhnImJ+/3f+QoMDDTGmAR/f/369TPZsmWzf1dT6zse93sLCwszbm5uZsGCBcYYY1atWmVsNps5fvy4GTt2rAHM33//bYwx5tatWyZfvnymbNmy5saNG/Zz/fDDDwYwY8aMsW+rWLGi8fPzM1euXLFv+/nnn+Pdyy1bthjALFy40CG+1atXx9tev359h++FSGpR86ukGf3793co161bl7/++uue71uyZAmlS5emVKlS/PPPP/bXk08+CRCvlqR+/foO/fKMMSxdupQWLVpgjHE4R9OmTQkPD2f37t0O5+jRo4dDP6+4wQlx8e7Zs4fQ0FBeeeUVe41AHJvNBkBYWBgbNmygXbt2XL161f6Zly5domnTphw5ciTBpqKUkiNHDjp06GAvlyxZkly5clG6dGmqV69u3x73c9y1Jed+/duPP/4IwNChQx22x9W+rlq1KtnX5OHhwdq1axN8JdfOnTu5dOkSffr0IWvW/zVudO7cmdy5czscu3r1ah599FGee+45h5j69OmT5M9r3769w3nv/G6dPXuW/fv3061bN4fRvvXr16dcuXJJ/pzChQvHu0cjR44EwNPT035c3Hezbt26XL9+3T46NrW/47lz56ZZs2Z8/fXXgFWjWatWLQICAuIdu3PnTi5evMhLL73k0J+0efPmlCpVyv4dO3fuHCEhIQQGBuLt7W0/7qmnnorXd3fJkiV4e3vz1FNPOXznq1SpQo4cOe6rNlbkYVHzq6QJHh4e5M2b12Fb7ty5k9RX5ciRIxw8eDDe++NcvHjRoVykSBGH8t9//82VK1eYPXs2s2fPTtI5ChUqFC9WwB7vsWPHABLt6wNw9OhRjDG8+eabvPnmm4l+7v00qd2PAgUK2B++cby9vSlYsGC8bfC/a0vO/fq3EydO4OLiQvHixR22+/r6kitXLk6cOHHf1xInS5YsDqOCU0JcPHfGmzVr1nj9106cOEGxYsXi3dc733s39/puJRZP3La7JdT/lj179kTv1YEDB3jjjTfYsGEDERERDvvCw8MB53zHO3XqRNeuXTl58iQrVqxg0qRJCR4Xd49KliwZb1+pUqX45ZdfHI577LHH4h1XsmRJh3t55MgRwsPDyZcvX6LXIeJsSuokTciSJUuy3xsbG0u5cuX48MMPE9x/Z5Ly71qIuPcDdOnShcDAwATPUb58eYdyYvEaY5IU878/d/jw4TRt2jTBY+4nGbhfiV3Dva4tOfcrIXcmPqktsc9PaDBAakqJ79aDuHLlCvXr18fLy4u33nqLYsWK4eHhwe7duxk1atR9TcOS0t/x5557Dnd3dwIDA4mKiqJdu3ZJfu+Dio2NJV++fCxcuDDB/Yn9o1IkNSmpk3SvWLFi7N27l0aNGiUrUcibNy85c+YkJiYmxWp5ihUrBsDvv/+e6DmLFi0KWFNbpHTt0sP0oPcrICCA2NhYjhw5QunSpe3bL1y4wJUrVxJsTnsY4mrArly54rD9zprCuHiOHj1Kw4YN7dtv377N8ePHHRLYgIAA/vjjD4wxDt/FhEZGJ9e/47lTSnxOUFAQly5dYtmyZdSrV8++PTQ01OE4Z3zHPT09admyJV9++SVPP/00jzzySILHxd2jw4cP27thxDl8+LB9f9x/jxw5Eu8chw8fdigXK1aMdevWUbt27Xj/MBRJK9SnTtK9du3acebMGebMmRNv340bN7h27dpd358lSxb7PFe///57vP1///33fcdUuXJlihQpwpQpU+IlDXE1Lvny5aNBgwZ8+umnnDt3LkU+NzU86P165plnAJgyZYrD9ria1ubNm6dMoPcQl5Rs3rzZvi0mJiZek3LVqlXJkycPc+bM4fbt2/btCxcujNc9oGnTppw5c8Zhuo6bN28m+N1MLn9/f8qWLcsXX3xBZGSkffumTZvYv3//A58/rqbw3zWDt27d4pNPPnE4zlnf8eHDhzN27NhEm3PB+p3ly5ePWbNmOUyT89NPP3Hw4EH7d8zPz4+KFSsyf/58e7MyWKN6//jjD4dztmvXjpiYGN5+++14n3f79u1490DEGVRTJ+le165dWbx4Mf3792fjxo3Url2bmJgYDh06xOLFi1mzZg1Vq1a96zkmTpzIxo0bqV69On369KFMmTKEhYWxe/du1q1bR1hY2H3F5OLiwsyZM2nRogUVK1akR48e+Pn5cejQIQ4cOMCaNWsAa+6zOnXqUK5cOfr06UPRokW5cOEC27dv5/Tp0+zduzfZ9+VhepD7VaFCBQIDA5k9e7a9qe+3335j/vz5tGzZ0qE27GF6/PHHqVGjBqNHjyYsLAwfHx+++eYbh8QNwM3NjXHjxjFw4ECefPJJ2rVrx/Hjx5k3b168/nP9+vVj+vTpdOzYkcGDB+Pn58fChQvtnfVTqsn53Xff5fnnn6d27dr06NGDy5cvM336dMqWLeuQ6CVHrVq1yJ07N4GBgQwaNAibzcaCBQviNf866zteoUIFKlSocNdjXF1dee+99+jRowf169enY8eO9ilNChcuzJAhQ+zHTpgwgebNm1OnTh169uxJWFgYH3/8MY8//rjDvaxfvz79+vVjwoQJhISE0KRJE1xdXTly5AhLlixh6tSptGnT5r6uRSTFOWPIrWReiU1pktC0FXHTFPxbQlOaGGNNYfDee++Zxx9/3Li7u5vcuXObKlWqmPHjx5vw8HD7cdwxrcW/Xbhwwbz88sumYMGCxtXV1fj6+ppGjRqZ2bNn24+Jm+5iyZIlDu+Nm47izilFfvnlF/PUU0+ZnDlzmuzZs5vy5cubjz/+2OGYY8eOmW7duhlfX1/j6upqHn30UfPss8+ab7/9NsE4E5KcKU0ef/zxeMcmdn8Tum9JuV+JiY6ONuPHjzdFihQxrq6upmDBgmb06NEO014Yc/9Tmtzt2ISu4dixY6Zx48bG3d3d5M+f37z22mtm7dq18e6XMcZMmzbNBAQEGHd3d1OtWjWzdetWU6VKFdOsWTOH4/766y/TvHlz4+npafLmzWuGDRtmli5dagCHqWISm9Lk/fffTzD2sWPHOmz75ptvTKlSpYy7u7spW7asWblypWndurUpVarUPe5U4r//OFu3bjU1atQwnp6ext/f34wcOdI+tc2d9+Vhf8fv9v9snDunNImzaNEiU6lSJePu7m58fHxM586dzenTp+O9f+nSpaZ06dLG3d3dlClTxixbtize7yfO7NmzTZUqVYynp6fJmTOnKVeunBk5cqQ5e/as/RhNaSLOYjMmlXrfiohkILGxseTNm5dWrVrds3l1ypQpDBkyhNOnTz+00cwAFStWJG/evA80hYuIpF/qUycicg83b96M1/z4xRdfEBYWFm+ZsBs3bsR776effspjjz2WYglddHR0vGbioKAg9u7dGy8eEck81KdOROQefv31V4YMGULbtm3JkycPu3fv5vPPP6ds2bK0bdvW4dhWrVpRqFAhKlasSHh4OF9++SWHDh1KdCqM5Dhz5gyNGzemS5cu+Pv7c+jQIWbNmoWvr2+8SbxFJPNQUicicg+FCxemYMGCTJs2zT6oolu3bkycONFhZRGwRsB+9tlnLFy4kJiYGMqUKcM333xD+/btUyye3LlzU6VKFT777DP+/vtvsmfPTvPmzZk4cSJ58uRJsc8RkfRFfepEREREMgD1qRMRERHJAJTUiYiIiGQA6lOHNTXB2bNnyZkzp9PXoxQREZGkMcZw9epV/P39cXFJ2XqqcePGsWLFCkJCQlL0vA+Tkjrg7Nmz8RZ9FxERkfTh1KlTFChQwNlhOJ2SOiBnzpyA9aXw8vJycjQiIpKZrV27ll9//ZWKFSvSpUsXFi5cyLPPPmvfHx4eTrdu3QgMDKRcuXJcuXKFUaNGERMTw6ZNm+Kdb9SoURw7doy1a9eyZcsWypcvn+hnz507lxIlSlCgQAEuX77MxIkT2b9/P/v27bOvCzx9+nSmT5/O22+/TZUqVbh+/TonT560r+ucmiIiIihYsKD9OZ7pOXM5i7QiPDzcAA7LSYmIiDgbYJYvX37P43777TcDmBMnTjhs//HHH02pUqXMgQMHDGD27NlzX5+/d+9eA5ijR48aY4wJCwsznp6eZt26dUk+R2xsrBk7dqwpWLCgcXNzM35+fmbgwIH2/Tdv3jTDhg0z/v7+Jlu2bKZatWoJLkdXv3594+npaXLlymWaNGliwsLC7M/vfv36mbx58xp3d3dTu3Zt89tvv9nfG7dU4rp16+xLvNWsWdMcOnTI4TMmTJhg8uXLZ3LkyGF69uxpRo0aZSpUqOBwnieeeMJky5bNeHt7m1q1apnjx4/fx918+DRQQkREJJ0LDw/HZrORK1cu+7YLFy7Qp08fFixYQLZs2e77nNeuXWPu3LkUKVLE3kVp7dq1xMbGcubMGUqXLk2BAgVo164dp06dSvQ8S5cu5aOPPuLTTz/lyJEjrFixgnLlytn3DxgwgO3bt/PNN9+wb98+2rZtS7NmzThy5AgAISEhNGrUiDJlyrB9+3Z++eUXWrRoQUxMjP0cK1euZP78+ezevZvixYvTtGlTwsLCHOJ4/fXXmTx5Mjt37iRr1qz07NnTvm/x4sWMGzeOd999l507d+Ln58cnn3xi33/79m1atmxJ/fr12bdvH9u3b6dv375prx++MzPKTZs2mWeffdb4+fnF+9fIrVu3zMiRI03ZsmVNtmzZjJ+fn+natas5c+aMwzkCAgIM4PCaMGHCfcWhmjoREUmL7nw2JuTGjRumcuXKplOnTvZtsbGxplmzZubtt982xhgTGhqa5Jq6GTNmmOzZsxvAlCxZ0l5LZ4xVm+Xq6mpKlixpVq9ebbZv324aNWpkSpYsaaKiohI83+TJk02JEiXMrVu34u07ceKEyZIlS7xne6NGjczo0aONMcZ07NjR1K5dO8Fznz171gDms88+s2+7deuW8ff3N5MmTTLGONbUxVm1apUBzI0bN4wxxtSsWdO89NJLDueuXr26vabu0qVLBjBBQUEJxpFWOLWm7tq1a1SoUIEZM2bE23f9+nV2797Nm2++ye7du1m2bBmHDx/mueeei3fsW2+9xblz5+yvgQMHpkb4IiIiThUdHU27du0wxjBz5kz79o8//pirV68yevTo+z5n586d2bNnD5s2baJEiRK0a9eOmzdvAtZsEdHR0UybNo2mTZtSo0YNvv76a44cOcLGjRsTPF/btm25ceMGRYsWpU+fPixfvty+dvH+/fuJiYmhRIkS5MiRw/7atGkTx44dA/5XU5eQ0NBQAKpXr27f5urqSrVq1Th48KDDsf/uS+jn5wfAxYsXATh48KDDOQBq1qxp/9nHx4fu3bvTtGlTWrRowdSpUzl37tw97mTqc+pAiaeffpqnn346wX3e3t6sXbvWYdv06dOpVq0aJ0+epFChQvbtOXPmxNfX96HGKiIikpbEJXQnTpxgw4YNDgP9NmzYwPbt23F3d3d4T9WqVencuTPz589P9Lze3t54e3vz2GOPUaNGDXLnzs3y5cvp2LGjPRkqU6aM/fi8efPyyCOPcPLkyQTPV7BgQQ4fPsy6detYu3YtL730Eu+//z6bNm0iMjKSLFmysGvXLvtAjDg5cuQAwNPT8/5uTCJcXV3tP8c1m8bGxib5/XPnzmXQoEGsXr2aRYsW8cYbb7B27Vpq1KiRIvGlhHTVpy6hPgOAfb3DSpUq8f7779v/BZCYqKgoIiIiHF4iIiLpRVxCd+TIEdatWxdvzd9p06axd+9eQkJCCAkJ4ccffwRg0aJF/Oc//0ny5xhjMMYQFRUFQO3atQE4fPiw/ZiwsDD++ecfAgICEj2Pp6cnLVq0YNq0aQQFBbF9+3b2799PpUqViImJ4eLFixQvXtzhFVdZU758edavX5/geYsUKQJAcHCww73ZsWOHQ+J5L6VLl3Y4B8Cvv/4a77hKlSoxevRotm3bRtmyZfnqq6+S/BmpId1MaXLz5k1GjRpFx44dHf41MmjQICpXroyPjw/btm1j9OjRnDt3jg8//DDRc02YMIHx48enRtgiIiL3JTIykqNHj9rLoaGhhISE4OPjQ6FChYiOjqZNmzbs3r2bH374gZiYGM6fPw9YzYRubm4OrVnwv1qvYsWK2edzO3PmDI0aNeKLL76gWrVq/PXXXyxatIgmTZqQN29eTp8+zcSJE/H09LRPV1KiRAmef/55Bg8ezOzZs/Hy8mL06NGUKlWKhg0bJng98+bNIyYmhurVq5MtWza+/PJLPD09CQgIIE+ePHTu3Jlu3boxefJkKlWqxN9//8369espX748zZs3Z/To0ZQrV46XXnqJ/v374+bmxsaNG2nbti3Zs2cH4M033+TRRx+lUKFCTJo0ievXr9OrV68k3/PBgwfTvXt3qlatSu3atVm4cCEHDhygaNGi9t/B7Nmzee655/D39+fw4cMcOXKEbt26JfkzUoWT+/TZcZfOoLdu3TItWrQwlSpVuudghs8//9xkzZrV3Lx5M9Fjbt68acLDw+2vU6dOaaCEiIikCXEd++98BQYGGmP+N+ghodedU4HESWigRNy2uPecOXPGPP300yZfvnzG1dXVFChQwHTq1Cne1B/h4eGmZ8+eJleuXMbHx8e88MIL5uTJk4lez/Lly0316tWNl5eXyZ49u6lRo4bDoIVbt26ZMWPGmMKFCxtXV1fj5+dnXnjhBbNv3z77MUFBQaZWrVrG3d3d5MqVyzRt2tRcvnzZYUqTRx555K5Tmly+fNm+bc+ePQYwoaGh9m3/+c9/zCOPPGJy5MhhAgMDzciRI+0DJc6fP29atmxp/Pz8jJubmwkICDBjxowxMTExiV63M9iMMSa1E8mE2Gw2li9fTsuWLR22x1Ux//XXX2zYsCFeFfOdDhw4QNmyZTl06BAlS5ZM0mdHRETg7e1NeHi4Jh8WERFJJ/T8dpSmm1//3Wdg48aN90zowBol4+LiQr58+VIhQhEREUlMTAxs2QLnzoGfH9StC3eMh5AU5NSk7m79Bvz8/O7ZZ2D79u0EBwfTsGFDcubMyfbt2xkyZAhdunQhd+7czrosERGRTG/ZMhg8GE6f/t+2AgVg6lRo1cp5cWVkTm1+DQoKSrBjZWBgIOPGjbOParnTxo0badCgAbt37+all17i0KFDREVFUaRIEbp27crQoUPjDeO+G1XfioiIpJxly6BNG7gzw4hbgOHbb1MmsdPz21Ga6VPnTPpSiIiIpIyYGChc2LGG7t9sNqvGLjT0wZti9fx2lK7mqRMREZG0bcuWxBM6sGrvTp2yjpOUpaROREREUkxSV89Kg6tspXtK6kRERCTF/P9KYil2nCSdkjoRERFJMZcv/29AREJsNihY0JreRFKWkjoRERF5YLduwdCh1qjWuCGYdyZ3ceUpUzRf3cOgpE5EREQeSGgo1KkDH31klV95BRYtgkcfdTyuQIGUm85E4kvTK0qIiIhI2rZsGfTsCeHhkCsXzJsHzz9v7WvdWitKpCYldSIiInLfoqJg+HCYPt0q16gB33wDAQH/OyZLFmjQwCnhZUpqfhUREZH7cvQo1Kr1v4RuxAjYvNkxoZPUp5o6ERERSbJFi6BPH7h6FfLkgfnzoXlzZ0cloJo6ERERSYIbN6B/f+jQwUro6tSBkBAldGmJkjoRERG5q8OHrT5zn35qTUvy2muwcaM1mlXSDjW/ioiISKK+/NKqobt2DfLmtcpNmjg7KkmIaupEREQknuvXoVcv6NrVSugaNIC9e5XQpWVK6kRERMTBH39AtWrw3/9aza1jx8K6dVqvNa1T86uIiIgA1vJe8+bByy9bAyN8fWHhQnjySWdHJkmhpE5ERESIjISXXoIFC6xy48ZW/7n8+Z0blySdml9FREQyuX374IknrITOxQXeeQfWrFFCl96opk5ERCSTMgbmzIHBg+HmTfD3h6+/hnr1nB2ZJIeSOhERkUwoIgL69bPWawVo1gy++MKatkTSJzW/ioiIZDJ79kCVKlZClyULvPcerFqlhC69U02diIhIJmEMzJwJQ4bArVtQsKCV2NWq5ezIJCUoqRMREckEwsOhd2/49lur3KKFNX2Jj49Tw5IUpOZXERGRDG7HDqhUyUroXF3hww/hu++U0GU0qqkTERHJoIyBadNgxAiIjobChWHRImu1CMl4lNSJiIhkQGFh0LOnVSMH0KoVfP455Mrl1LDkIVLzq4iISAbz669Wc+t334GbG3z8sdX0qoQuY1NSJyIikkHExsL770PdunDyJBQrBtu2wYABYLM5Ozp52NT8KiIikgH88w90727NNwfQrp21WoSXl1PDklSkmjoREZF07pdfoGJFK6Fzd4dZs6z555TQZS5K6kRERNKp2FiYMAEaNIAzZ6BECQgOtpb/UnNr5qPmVxERkXTo4kXo2hV+/tkqd+lirRaRI4dz4xLnUVInIiKSzgQFQadOcO4ceHrC9OnQo4dq5zI7Nb+KiIikEzEx8NZb0KiRldCVLm2tFtGzpxI6UU2diIhIunD+PHTuDBs2WOUePaz557Jnd25cknYoqRMREUnj1q2zErqLFyFbNmt0a9euzo5K0ho1v4qIiKRRt2/Dm29CkyZWQleuHOzapYROEqaaOhERkTTozBlrMMTmzVa5Tx+YOtUaGCGSECV1IiIiaczq1VZt3D//WFOUzJ4NHTs6OypJ69T8KiIikkZER8Orr8LTT1sJXcWKsHu3EjpJGtXUiYiIpAEnT1rJ27ZtVvnll+GDD8DDw7lxSfqhpE5ERMTJvv8euneHsDBrvdbPP4c2bZwdlaQ3an4VERFxklu3YNgweO45K6GrWhX27FFCJ8mjpE5ERMQJQkOhbl348EOr/Mor8MsvULSoU8OSdEzNryIiIqls+XJrRYjwcMiVC+bNg+efd3ZUkt6ppk5ERCSVREXBoEHQqpWV0NWoASEhSugkZSipExERSQVHj0KtWtZ6rQDDh1sTCwcEODcuyTjU/CoiIvKQLV4MvXvD1auQJw/Mnw/Nmzs7KsloVFMnIiLykNy4AS++CO3bWwldnTpWc6sSOnkYlNSJiIg8BIcPW33mZs2yyqNHw8aNUKCAc+OSjEvNryIiIils4ULo1w+uXYO8eWHBAmja1NlRSUanmjoREZEUcv261XeuSxcroWvQwGpuVUInqUFJnYiISAr44w+oVs1a4stmgzFjYN068Pd3dmSSWTg1qdu8eTMtWrTA398fm83GihUr7Puio6MZNWoU5cqVI3v27Pj7+9OtWzfOnj3rcI6wsDA6d+6Ml5cXuXLlolevXkRGRqbylYiISGY2bx488QQcOAD581vJ3PjxkCWLsyOTzMSpSd21a9eoUKECM2bMiLfv+vXr7N69mzfffJPdu3ezbNkyDh8+zHPPPedwXOfOnTlw4ABr167lhx9+YPPmzfTt2ze1LkFERDKxyEgIDLRWh7h+HRo3hr174cknnR2ZZEY2Y4xxdhAANpuN5cuX07Jly0SP2bFjB9WqVePEiRMUKlSIgwcPUqZMGXbs2EHVqlUBWL16Nc888wynT5/GP4l13hEREXh7exMeHo6Xl1dKXI6IiGRw+/dDu3Zw6BC4uFg1c6NHq3YuNen57Shd9akLDw/HZrORK1cuALZv306uXLnsCR1A48aNcXFxITg4ONHzREVFERER4fASERFJCmPgs8+s/nOHDll95jZsgDfeUEInzpVukrqbN28yatQoOnbsaM/Gz58/T758+RyOy5o1Kz4+Ppw/fz7Rc02YMAFvb2/7q2DBgg81dhERyRiuXoXOnaFPH7h5E5o1s0a31q/v7MhE0klSFx0dTbt27TDGMHPmzAc+3+jRowkPD7e/Tp06lQJRiohIRrZnD1SuDF9/bdXITZwIq1ZZ89CJpAVpfvLhuITuxIkTbNiwwaHN3NfXl4sXLzocf/v2bcLCwvD19U30nO7u7ri7uz+0mEVEJOMwBmbOhKFDISoKChaEb76BWrWcHZmIozRdUxeX0B05coR169aRJ08eh/01a9bkypUr7Nq1y75tw4YNxMbGUr169dQOV0REMpjwcGswxMsvWwldixZWjZ0SOkmLnFpTFxkZydGjR+3l0NBQQkJC8PHxwc/PjzZt2rB7925++OEHYmJi7P3kfHx8cHNzo3Tp0jRr1ow+ffowa9YsoqOjGTBgAB06dEjyyFcREZGE7NwJ7dvDX39B1qzw3nswZIg1sbBIWuTUKU2CgoJo2LBhvO2BgYGMGzeOIkWKJPi+jRs30qBBA8CafHjAgAF8//33uLi40Lp1a6ZNm0aOHDmSHIeGRIuISBxjYNo0GDECoqMhIAAWLQI1AKU9en47SjPz1DmTvhQiIgJw+TL07AlxCxy98IK17Ffu3E4NSxKh57ejNN2nTkREJLUEB0OlSlZC5+Zm1dYtXaqETtIPJXUiIpKpxcbC5MlQpw6cOAFFi8K2bTBwoPrPSfqS5qc0EREReVguXbLWbl21yiq3awezZ4O3t3PjEkkO1dSJiEimtHUrVKxoJXTu7tZcdN98o4RO0i8ldSIikqnExsKECdbSXqdPQ4kSVn+6/v3V3Crpm5pfRUQk07h4Ebp1gzVrrHLnzlYNXc6czo1LJCUoqRMRkUxh0ybo2BHOnQNPT/j4Y2v6EtXOSUah5lcREcnQYmLgrbfgySethK50afjtN+jVSwmdZCyqqRMRkQzr/Hno0gXWr7fK3bvD9OmQPbtTwxJ5KJTUiYhIhrR+vdVn7sIFyJbN6jvXrZuzoxJ5eNT8KiIiGcrt2zBmDDz1lJXQlS0LO3cqoZOMTzV1IiKSYZw9aw2G2LzZKvfpA1OnWgMjRDI6JXUiIpIhrF4NXbvCP/9AjhzWyhAdOzo7KpHUo+ZXERFJ127fhtGj4emnrYSuYkXYtUsJnWQ+qqkTEZF069QpK3nbutUqv/QSTJ4MHh7OjUvEGZTUiYhIuvTDDxAYCGFh4OUFn30Gbds6OyoR51Hzq4iIpCu3bsGwYdCihZXQVakCu3croRNRTZ2IiKQbx49Dhw4QHGyVBw+G994Dd3enhiWSJiipExGRdGHFCujRA65cgVy5YO5caNnSuTGJpCVqfhURkTQtKsqqkXvhBSuhq14d9uxRQidyJyV1IiKSZh07BrVrw7RpVnn4cNiyBQoXdmpYImmSml9FRCRNWrIEeveGiAjw8YEvvoDmzZ0dlUjapZo6ERFJU27etOaba9fOSuhq14aQECV0IveipE5ERNKMP/+EGjVg5kyrPHo0BAVBwYJODUskXVDzq4iIpAlffQX9+kFkJOTNCwsWQNOmzo5KJP1QTZ2IiDjV9evQpw907mwldPXrW82tSuhE7o+SOhERcZqDB60pSj77DGw2GDMG1q0Df39nRyaS/qj5VUREnGL+fGtAxPXrkD8/LFwIjRo5OyqR9Es1dSIikqquXYPu3a3X9etWIhcSooRO5EEpqRMRkVTz++9QtapVS+fiAm+/DWvWgK+vsyMTSf/U/CoiIg+dMfD55zBwoDUPnb+/Ndq1fn1nRyaScSipExGRh+rqVejf30riAJo1s1aHyJvXuXGJZDRqfhURkYcmJMRqbv3qK8iSBSZOhFWrlNCJPAyqqRMRkRRnDMyaBUOGQFQUFCgA33xjLfklIg+HkjoREUlR4eHQty8sXmyVn30W5s2DPHmcGpZIhqfmVxERSTE7d0LlylZClzUrTJ4MK1cqoRNJDaqpExGRB2YMfPwxDB8O0dEQEACLFlmrRYhI6lBSJyIiD+TyZejVC5Yvt8otW8J//wu5czs1LJFMR82vIiKSbMHBUKmSldC5ucG0abBsmRI6EWdQUiciIvfNGKu/XJ06cOIEFC0K27ZZkwvbbM6OTiRzUvOriIjcl0uXrHVbf/jBKrdtC3PmgLe3U8MSyfRUUyciIkm2davV3PrDD+DuDjNnWgMilNCJOJ+SOhERuafYWGs1iPr14dQpeOwx+PVXa/kvNbeKpA1qfhURkbv6+2/o1g1Wr7bKnTpZq0XkzOncuETEkZI6ERFJ1ObN0LEjnD0LHh4wfTr07KnaOZG0SM2vIiIST0wMvPMONGxoJXSlSsGOHdZ8dEroRNIm1dSJiIiD8+ehSxdYv94qBwbCjBmQPbtz4xKRu1NSJyIiduvXQ+fOcOECZMsGn3xiJXUikvap+VVERIiJgbFj4amnrISubFmruVUJnUj6oZo6EZFM7uxZa0Trpk1WuXdvmDrVqqkTkfRDSZ2ISCa2Zg107WpNW5IjB3z6qZXgiUj6o+ZXEZFM6PZtGD0amjWzEroKFWDXLiV0IumZaupERDKZU6esuee2brXKL74IH35ozUMnIumXU2vqNm/eTIsWLfD398dms7FixQqH/cuWLaNJkybkyZMHm81GSEhIvHM0aNAAm83m8Orfv3/qXICISDqzahVUrGgldF5e1rqtn3yihE4kI3BqUnft2jUqVKjAjBkzEt1fp04d3nvvvbuep0+fPpw7d87+mjRp0sMIV0Qk3YqOhhEj4NlnISwMqlSB3buhXTtnRyYiKSVZza+hoaFs2bKFEydOcP36dfLmzUulSpWoWbMmHvfxz72nn36ap59+OtH9Xbt2BeD48eN3PU+2bNnw9fVN8ueKiGQmJ05A+/YQHGyVBw2CSZPA3d25cYlIyrqvpG7hwoVMnTqVnTt3kj9/fvz9/fH09CQsLIxjx47h4eFB586dGTVqFAEBAQ8r5gTj+vLLL/H19aVFixa8+eabZLvLWPyoqCiioqLs5YiIiNQIU0Qk1a1YAT16wJUrkCsX/Pe/8MILTg5KRB6KJCd1lSpVws3Nje7du7N06VIKFizosD8qKort27fzzTffULVqVT755BPatm2b4gHfqVOnTgQEBODv78++ffsYNWoUhw8fZtmyZYm+Z8KECYwfP/6hxyYi4ixRUTBqlDXfHEC1alb/ucKFnRqWiDxENmOMScqBa9asoWnTpkk66aVLlzh+/DhVqlRJeiA2G8uXL6dly5bx9h0/fpwiRYqwZ88eKlaseNfzbNiwgUaNGnH06FGKFSuW4DEJ1dQVLFiQ8PBwvLy8khyziEha9NdfVl+5Xbus8rBh8O674Obm3LhEUlpERATe3t56fv+/JNfUJTWhA8iTJw958uRJVkAPqnr16gB3Terc3d1xV2cSEcmAvv0WevWCiAjw8YH5863BESKS8SVr9Ovu3bvZv3+/vfzdd9/RsmVLXnvtNW7dupViwSVH3LQnfn5+To1DRCQ13bwJL78MbdtaCV3t2hASooROJDNJVlLXr18//vzzTwD++usvOnToQLZs2ViyZAkjR45M8nkiIyMJCQmxJ2KhoaGEhIRw8uRJAMLCwggJCeGPP/4A4PDhw4SEhHD+/HkAjh07xttvv82uXbs4fvw4K1eupFu3btSrV4/y5csn59JERNKdI0egZk1rvjmAV1+FjRvhjq7PIpLRmWTw8vIyR48eNcYYM3HiRNOkSRNjjDG//PKLKVCgQJLPs3HjRgPEewUGBhpjjJk7d26C+8eOHWuMMebkyZOmXr16xsfHx7i7u5vixYubESNGmPDw8Pu6nvDwcAPc9/tERJztq6+MyZHDGDDmkUeM+eknZ0ckknr0/HaUrHnqjDHExsYCsG7dOp79//r9ggUL8s8//yT5PA0aNMDcZZxG9+7d6d69e6L7CxYsyKZNm5L8eSIiGcWNGzB4MMyZY5Xr1YOvvoJHH3VuXCLiPMlqfq1atSrvvPMOCxYsYNOmTTRv3hywmk/z58+fogGKiIijQ4esKUrmzAGbDd58E9avV0Inktklq6ZuypQpdO7cmRUrVvD6669TvHhxAL799ltq1aqVogGKiMj/fPEFvPgiXL8O+fPDl19C48bOjkpE0oIkz1OXFDdv3iRLliy4urqm1ClThea5EZG07to1GDAA5s2zyk8+CQsXglZIlMxMz29HyWp+TYyHh0e6S+hERNK6Awes5tZ588DFBd56C37+WQmdiDhKcvNr7ty5sdlsSTo2LCws2QGJiIjFGGut1oEDrYERfn7w9ddQv76zIxORtCjJSd2UKVPsP1+6dIl33nmHpk2bUrNmTQC2b9/OmjVrePPNN1M8SBGRzObqVavv3MKFVrlpU6s/Xb58zo1LRNKuZPWpa926NQ0bNmTAgAEO26dPn866detYsWJFSsWXKtQmLyJpyd691tqtf/4JWbLAO+/AyJFW06uI/I+e346S9SdizZo1NGvWLN72Zs2asW7dugcOSkQkMzIGZs2C6tWthK5AAQgKslaIUEInIveSrD8TefLk4bvvvou3/bvvviNPnjwPHJSISGYTEQEdOlhNrlFR0Ly5tXZrnTrOjkxE0otkzVM3fvx4evfuTVBQENWrVwcgODiY1atXMyduenMREUmSXbugfXs4dgyyZoWJE2HIENXOicj9SVZS1717d0qXLs20adNYtmwZAKVLl+aXX36xJ3kiInJ3xsD06TB8ONy6BQEB8M03UKOGsyMTkfQoRScfTq/U0VJEUtuVK9CrF/z/v4tp2dKaviR3bmdGJZK+6PntKFk1dQCxsbEcPXqUixcvEhsb67CvXr16DxyYiEhG9dtvVnPr8ePg6goffGDNRZfEqUBFRBKUrKTu119/pVOnTpw4cYI7K/psNhsxMTEpEpyISEZiDHz0EYwaBbdvQ9GisGgRVK3q7MhEJCNIVlLXv39/qlatyqpVq/Dz80vyShMiIplVWBh07w7ff2+V27SBzz4Db2+nhiUiGUiykrojR47w7bffUrx48ZSOR0Qkw9m2zZqu5NQpcHe3auv691dzq4ikrGQNmK9evTpHjx5N6VhERDKU2Fh47z2oV89K6B57DH791ZqLTgmdiKS0ZNXUDRw4kGHDhnH+/HnKlSuHq6urw/7y5cunSHAiIunV339DYCD89JNV7tgRPv0UcuZ0blwiknEla0oTlwRmxLTZbBhj0uVACQ2JFpGUtHmzlcSdPQseHvDxx9b0JaqdE0lZen47SlZNXWhoaErHISKS7sXEwIQJMHas1fRaqhQsXgzlyjk7MhHJDJKV1AUEBKR0HCIi6dqFC9ClC6xbZ5W7dYMZMyBHDufGJSKZR7InHz527BhTpkzh4MGDAJQpU4bBgwdTrFixFAtORCQ92LABOneG8+chWzb45BOrP52ISGpK1ujXNWvWUKZMGX777TfKly9P+fLlCQ4O5vHHH2ft2rUpHaOISJoUE2M1tTZubCV0jz8OO3YooRMR50jWQIlKlSrRtGlTJk6c6LD91Vdf5eeff2b37t0pFmBqUEdLEblfZ89atXNBQVa5d2+YOtWqqROR1KHnt6Nk1dQdPHiQXr16xdves2dP/vjjjwcOSkQkLfv5Z6hY0UrocuSAhQthzhwldCLiXMlK6vLmzUtISEi87SEhIeTLl+9BYxIRSZNu34bXXoOmTa156CpUgF27oFMnZ0cmIpLMgRJ9+vShb9++/PXXX9SqVQuArVu38t577zF06NAUDVBEJC04fdqae+6XX6xy//7Wcl8eHs6NS0QkTrL61BljmDJlCpMnT+bs2bMA+Pv7M2LECAYNGoQtnc2wqTZ5EbmbH3+0pii5dMlaEeKzz6BdO2dHJSJ6fjtKVlL3b1evXgUgZzpe+0ZfChFJSHQ0vP46vP++Va5c2ZpMWDM3iaQNen47SvaKErdv3+axxx5zSOaOHDmCq6srhQsXTqn4RESc4sQJ6NABfv3VKg8caCV37u7OjUtEJDHJGijRvXt3tm3bFm97cHAw3bt3f9CYRESc6rvvoFIlK6HLlQuWLYNp05TQiUjalqykbs+ePdSuXTve9ho1aiQ4KlZEJD24dQteeQVatoTLl6FaNdizB154wdmRiYjcW7KSOpvNZu9L92/h4eHExMQ8cFAiIqntr7+gdm1rAmGAYcNgyxZQbxIRSS+SldTVq1ePCRMmOCRwMTExTJgwgTp16qRYcCIiqWHpUqu5dedO8PGBlSvhgw/Azc3ZkYmIJF2yBkq899571KtXj5IlS1K3bl0AtmzZQkREBBs2bEjRAEVEHpabN2H4cJgxwyrXqgVffw2FCjk3LhGR5EhWTV2ZMmXYt28f7dq14+LFi1y9epVu3bpx6NAhypYtm9IxioikuCNHrCQuLqEbNcpa9ksJnYikVw88T11GoHluRDKXb76BPn0gMhIeeQQWLIBmzZwdlYjcLz2/HSWrpg6s5tYuXbpQq1Ytzpw5A8CCBQv4JW4NHRGRNObGDejXz1ruKzIS6tWDkBAldCKSMSQrqVu6dClNmzbF09OT3bt3ExUVBVijX999990UDVBEJCUcOgTVq8Ps2WCzwRtvwPr18Oijzo5MRCRlJCupe+edd5g1axZz5szB1dXVvr127drs3r07xYITEUkJCxZA1aqwfz/kzw8//wxvvw1ZkzVUTEQkbUpWUnf48GHq1asXb7u3tzdXrlx50JhERFLEtWvQsyd062b9/OSTVnNr48bOjkxEJOUlK6nz9fXl6NGj8bb/8ssvFC1a9IGDEhF5UAcOWCtCzJ0LLi4wfrxVQ+fr6+zIREQejmQldX369GHw4MEEBwdjs9k4e/YsCxcuZPjw4bz44ospHaOISJIZA//9LzzxBPzxB/j5WX3nxoyBLFmcHZ2IyMOTrB4lr776KrGxsTRq1Ijr169Tr1493N3dGT58OAMHDkzpGEVEkiQyEl58Eb780io3aWL1p8uXz7lxiYikhgeap+7WrVscPXqUyMhIypQpQ44cOVIytlSjeW5E0r99+6BtW/jzT6tG7u23rQmFXZI9cZOIpHV6fjt6oD93bm5ulClThlKlSrFu3ToOHjyYUnGJiCSJMfDpp1b/uT//tKYoCQqC0aOV0IlI5pKsP3nt2rVj+vTpANy4cYMnnniCdu3aUb58eZYuXZqiAYqIJCYiwppIuH9/iIqC5s2t0a116jg7MhGR1JespG7z5s3UrVsXgOXLlxMbG8uVK1eYNm0a77zzTooGKCKSkN27oXJlWLTImm/u/fdh5Upr2S8RkcwoWUldeHg4Pj4+AKxevZrWrVuTLVs2mjdvzpEjR1I0QBGRfzMGpk+HmjXh2DEICIAtW2D4cDW3ikjmlqw/gQULFmT79u1cu3aN1atX06RJEwAuX76Mh4dHigYoIhLnyhVo0wYGDoRbt+D552HPHqhRw9mRiYg4X7KmNHnllVfo3LkzOXLkICAggAYNGgBWs2y5cuVSMj4REQB++w3at4fjx8HV1WpuHTTIWsdVRESSmdS99NJLVK9enZMnT/LUU0/h8v9tHkWLFlWfOhFJUcbAlCnW9CTR0VCkiNWP7oknnB2ZiEja8kDz1GUUmudGJG0KC4MePawBEGA1vX72GXh7OzcuEUkb9Px2lOQ+dRMnTuTGjRtJOjY4OJhVq1bd87jNmzfTokUL/P39sdlsrFixwmH/smXLaNKkCXny5MFmsxESEhLvHDdv3uTll18mT5485MiRg9atW3PhwoUkxSkiadf27VCxopXQubnBjBmweLESOhGRxCQ5qfvjjz8oVKgQL730Ej/99BN///23fd/t27fZt28fn3zyCbVq1aJ9+/bkzJnznue8du0aFSpUYMaMGYnur1OnDu+9916i5xgyZAjff/89S5YsYdOmTZw9e5ZWrVol9bJEJI2JjYVJk6BuXTh1CooXh19/hZdeUv85EZG7ua/m17179zJ9+nS+/fZbIiIiyJIlC+7u7ly/fh2ASpUq0bt3b7p3737fo2BtNhvLly+nZcuW8fYdP36cIkWKsGfPHipWrGjfHh4eTt68efnqq69o06YNAIcOHaJ06dJs376dGkkcEqfqW5G04e+/ITAQfvrJKnfsaK0WkYR/I4pIJqTnt6P7GihRoUIF5syZw6effsq+ffs4ceIEN27c4JFHHqFixYo8ksqzfu7atYvo6GgaN25s31aqVCkKFSp016QuKiqKqKgoezkiIuKhxyoid7dlC3ToAGfPgocHTJsGvXurdk5EJKmSNfrVxcWFihUrOtSaOcP58+dxc3MjV65cDtvz58/P+fPnE33fhAkTGD9+/EOOTkSSIjYWJkyAMWOsn0uVsvrOaXYkEZH7kynnXx89ejTh4eH216lTp5wdkkimdOECNGsGb7xhJXTdusGOHUroRESSI1k1dWmFr68vt27d4sqVKw61dRcuXMDX1zfR97m7u+Pu7p4KEYpIYjZsgM6d4fx5yJbNGt3avbuzoxIRSb/SdU1dlSpVcHV1Zf369fZthw8f5uTJk9SsWdOJkYlIYmJiYNw4aNzYSugef9yqnVNCJyLyYJxaUxcZGcnRo0ft5dDQUEJCQvDx8aFQoUKEhYVx8uRJzp49C1gJG1g1dL6+vnh7e9OrVy+GDh2Kj48PXl5eDBw4kJo1ayZ55KuIpJ5z56zauY0brXKvXtaAiGzZnBuXiEhG8EA1dUePHmXNmjX2SYnvd3GKnTt3UqlSJSpVqgTA0KFDqVSpEmPGjAFg5cqVVKpUiebNmwPQoUMHKlWqxKxZs+zn+Oijj3j22Wdp3bo19erVw9fXl2XLlj3IZYnIQ7B2rTWZ8MaNkD07fPmltTqEEjoRkZSRrGXCLl26RPv27dmwYQM2m40jR45QtGhRevbsSe7cuZk8efLDiPWh0Tw3Ig/P7dtWc+u771rruJYvb41uLVnS2ZGJSHqn57ejZNXUDRkyhKxZs3Ly5Emy/euf2e3bt2f16tUpFpyIpG+nT8OTT8J//mMldP37W6tDKKETEUl5yepT9/PPP7NmzRoKFCjgsP2xxx7jxIkTKRKYiKRvP/5oTVFy6ZK1IsScOdC+vbOjEhHJuJJVU3ft2jWHGro4YWFhmipEJJOLjoaRI6F5cyuhq1wZdu9WQici8rAlK6mrW7cuX3zxhb1ss9mIjY1l0qRJNGzYMMWCE5H05eRJqF8f3n/fKg8cCNu2QfHizo1LRCQzSFbz66RJk2jUqBE7d+7k1q1bjBw5kgMHDhAWFsbWrVtTOkYRSQdWrrTmmrt8Gby94b//hVatnB2ViEjmkayaurJly/Lnn39Sp04dnn/+ea5du0arVq3Ys2cPxYoVS+kYRSQNu3ULhgyB55+3EronnoA9e5TQiYiktmRNaZLRaEi0SPKEhlp95XbssMpDh8KECeDm5ty4RCRz0PPbUbJXlLh58yb79u3j4sWLxMbGOux77rnnHjgwEUnbli2Dnj0hPBxy54b586FFC2dHJSKSeSUrqVu9ejXdunXjn3/+ibfPZrMRExPzwIGJSNp08yaMGAHTp1vlmjXhm2+gUCHnxiUiktklq0/dwIEDadu2LefOnSM2NtbhpYROJOM6ehRq1fpfQjdyJGzapIRORCQtSFZN3YULFxg6dCj58+dP6XhEJI1atAj69IGrV+GRR+CLL+Dpp50dlYiIxElWTV2bNm0ICgpK4VBEJC26ccNa3qtDByuhq1sXQkKU0ImIpDXJGv16/fp12rZtS968eSlXrhyurq4O+wcNGpRiAaYGjZ4RSdjhw9CuHezbBzYbvP46jB0LWZM9xEpEJOXo+e0oWX+av/76a37++Wc8PDwICgrCZrPZ99lstnSX1IlIfF9+adXQXbsG+fJZ5aeecnZUIiKSmGQlda+//jrjx4/n1VdfxcUlWS24IpJGXb8OAwbA3LlWuWFDWLgQ/PycG5eIiNxdsjKyW7du0b59eyV0IhnMgQPWihBz54KLC4wfD2vXKqETEUkPkpWVBQYGsmjRopSORUScxBgrkXviCfjjD/D1hfXrYcwYyJLF2dGJiEhSJKv5NSYmhkmTJrFmzRrKly8fb6DEhx9+mCLBicjDFxkJL70ECxZY5SZNrJ/z5XNuXCIicn+SldTt37+fSpUqAfD777877Pv3oAkRSdv27bNGtx4+bDW3vv02vPqq9bOIiKQvyUrqNm7cmNJxiEgqMgbmzIHBg61lvx59FL7+2pqDTkRE0ifNNiWSyUREQL9+1nqtAM88A/PnW6tEiIhI+pXkpK5Vq1bMmzcPLy8vWrVqdddjly1b9sCBiUjK27PHam49etSaQPjdd2HYMDW3iohkBElO6ry9ve395by9vR9aQCKS8oyBTz6BoUPh1i0oVMiqqatZ09mRiYhISrmvZcLeeusthg8fTrZs2R5mTKlOy4xIRnblCvTpA99+a5Wfe86avsTHx6lhiYg8MD2/Hd1Xo8v48eOJjIx8WLGISArbsQMqV7YSOldXmDIFVqxQQicikhHd10CJ+6jUExEnMgamToWRIyE6GooUgUWLrMmFRUQkY7rv0a+ah04kbQsLgx49YOVKq9y6NXz2GeTK5dSwRETkIbvvpK5EiRL3TOzCwsKSHZCIJN/27dChA5w8CW5u8OGH1moR+reYiEjGd99J3fjx4zX6VSSNiY2FyZPhtdfg9m0oXhwWL4b/X/hFREQygftO6jp06EA+LQopkmb88w8EBsKPP1rlDh3g009BA8FERDKX+xr9qv50ImnLli1QsaKV0Hl4WMncV18poRMRyYzuK6nT6FeRtCE21loNomFDOHMGSpaE4GDo21f950REMqv7an6NjY19WHGISBJdvAhdu8LPP1vlrl2t1SJy5HBuXCIi4lz33adORJwnKAg6dYJz58DTE2bMgO7dVTsnIiL32fwqIs4REwPjx0OjRlZCV6YM7NxpzUenhE5EREA1dSJp3rlz0KULbNhglXv2hI8/hgy2BLOIiDwgJXUiadjatVZCd/EiZM8Os2ZZZRERkTup+VUkDbp9G954A5o2tRK68uWt5lYldCIikhjV1ImkMWfOQMeO1hx0AP36wUcfWQMjREREEqOkTiQN+ekn6NbNWiUiZ06YPdtaIUJERORe1PwqkgZER8OoUfDMM1ZCV7ky7N6thE5ERJJONXUiTnbypNXcum2bVR4wAD74ANzdnRuXiIikL0rqRJzo++8hMBAuXwZvb/j8c2jd2tlRiYhIeqTmVxEnuHULhg2D556zEronnoA9e5TQiYhI8impE0lloaFQty58+KFVHjIEfvkFihRxblwiIpK+qflVJBUtW2atCBEeDrlzw7x5Vm2diIjIg1JNnUgqiIqCgQOt5tXwcKhZ02puVUInIiIpRUmdyEN29CjUqgXTp1vlkSNh0yYICHBuXCIikrGo+VXkIVq8GHr3hqtXIU8e+OILay46ERGRlKaaOpGH4MYN6N8f2re3Erq6dSEkRAmdiIg8PErqRFLY4cNQowZ8+inYbPD667BhAxQo4OzIREQkI1Pzq0gKWrgQ+vWDa9cgXz748kt46ilnRyUiIpmBaupEUsD161bfuS5drISuYUOruVUJnYiIpBanJnWbN2+mRYsW+Pv7Y7PZWLFihcN+YwxjxozBz88PT09PGjduzJEjRxyOKVy4MDabzeE1ceLEVLwKyez++AOqVbOW+LLZYNw4WLsW/PycHZmIiGQmTk3qrl27RoUKFZgxY0aC+ydNmsS0adOYNWsWwcHBZM+enaZNm3Lz5k2H49566y3OnTtnfw0cODA1whdh3jxria8DB8DXF9avh7FjIUsWZ0cmIiKZjVP71D399NM8/fTTCe4zxjBlyhTeeOMNnn/+eQC++OIL8ufPz4oVK+jQoYP92Jw5c+Lr65sqMYsAREbCyy9bU5SA1cy6YAHkz+/cuEREJPNKs33qQkNDOX/+PI0bN7Zv8/b2pnr16mzfvt3h2IkTJ5InTx4qVarE+++/z+3bt+967qioKCIiIhxeIkm1f79VO/fFF+DiAv/5D6xerYROREScK82Ofj1//jwA+e94UubPn9++D2DQoEFUrlwZHx8ftm3bxujRozl37hwfxq2WnoAJEyYwfvz4hxO4ZFjGwGefwaBBcPMmPPoofP21NQediIiIs6XZpC6phg4dav+5fPnyuLm50a9fPyZMmIC7u3uC7xk9erTD+yIiIihYsOBDj1XSr4gIa6qSb76xyk8/bdXUPfKIc+MSERGJk2abX+P6yF24cMFh+4ULF+7af6569ercvn2b48ePJ3qMu7s7Xl5eDi+RxOzZA1WqWAldliwwaRL88IMSOhERSVvSbFJXpEgRfH19Wb9+vX1bREQEwcHB1KxZM9H3hYSE4OLiQr58+VIjTMnAjIFPPrFWhzh6FAoVgi1bYMQIqy+diIhIWuLU5tfIyEiOHj1qL4eGhhISEoKPjw+FChXilVde4Z133uGxxx6jSJEivPnmm/j7+9OyZUsAtm/fTnBwMA0bNiRnzpxs376dIUOG0KVLF3Lnzu2kq5KMIDzcmkz422+t8nPPwdy54OPj3LhEREQS49SkbufOnTRs2NBejuvnFhgYyLx58xg5ciTXrl2jb9++XLlyhTp16rB69Wo8PDwAqxn1m2++Ydy4cURFRVGkSBGGDBni0F9O5H7t3Ant2kFoKLi6Ws2tgwdbEwuLiIikVTZjjHF2EM4WERGBt7c34eHh6l+XiRkD06ZZzavR0VC4MCxebE1fIiIiaY+e347S/ehXkZRw+TL07AlxK9W1amUt+5UrlzOjEhERSTp195ZM79dfoVIlK6Fzc4Pp062+dEroREQkPVFSJ5lWbCx88IE1efCJE1CsGGzfbi3/pf5zIiKS3qj5VTKlf/6B7t1h1Sqr3L49zJ4N6pIhIiLplZI6yXR++QU6doTTp8Hd3Roc0aePaudERCR9U/OrZBqxsTBhAjRoYCV0JUvCb79B375K6EREJP1TTZ1kChcvQteu8PPPVrlLF5g5E3LkcG5cIiIiKUVJnWR4QUHQqROcOweenjBjhtWfTrVzIiKSkaj5VTKsmBh46y1o1MhK6MqUgR07oEcPJXQiIpLxqKZOMqTz56FzZ9iwwSr36AEffwzZszs3LhERkYdFSZ1kOOvWWX3mLlywkriZM63+dCIiIhmZml8lw7h9G958E5o0sRK6cuVg504ldCIikjmopk4yhDNnrMEQmzdb5X794KOPrIERIiIimYGSOkn3Vq+2auP++Qdy5rRWhujQwdlRiYiIpC41v0q6FR0Nr74KTz9tJXSVKsGuXUroREQkc1JNnaRLp05Zydu2bVb55Zfhgw/Aw8O5cYmIiDiLkjpJd77/3po8OCwMvL3h88+hdWtnRyUiIuJcan6VdOPWLRg2DJ57zkronngCdu9WQiciIgKqqZN04vhxaN8efvvNKr/yCrz3Hri5OTMqERGRtENJnaR5y5dDz55w5QrkygXz5sHzzzs5KBERkTRGza+SZkVFwaBB0KqVldDVqAEhIUroREREEqKkTtKkY8egdm1rvVaAkSOtiYUDApwbl4iISFql5ldJc5Ysgd69ISIC8uSBL76AZ55xdlQiIiJpm2rqJM24eRNeegnatbMSujp1rOZWJXQiIiL3pqRO0oQ//7T6zM2cCTYbvPYabNwIBQo4OzIREZH0Qc2v4nQLF0K/fnDtGuTNC19+CU2aODsqERGR9EU1deI0169bfee6dLESugYNYO9eJXQiIiLJoaROnOLgQahe3Vriy2aDsWNh3Trw83N2ZCIiIumTml8l1c2fbw2IuH4dfH2t5tcnn3R2VCIiIumbauok1Vy7BoGB0L27ldA1bmyNblVCJyIi8uCU1Emq2L8fqla15pxzcYF33oE1ayB/fmdHJiIikjGo+VUeKmOsfnMDB1rz0Pn7w9dfQ716zo5MREQkY1FSJw/N1avQvz989ZVVfvppqz9d3rzOjUtERCQjUvOrPBQhIVClipXQZckC770HP/yghE5ERORhUU2dpChjYNYsGDIEoqKgYEH45huoVcvZkYmIiGRsSuokxYSHQ58+sGSJVW7RAubNAx8fp4YlIiKSKaj5VVLEzp1QubKV0Lm6wocfwnffKaETERFJLaqpkwdiDHz8MQwfDtHRULgwLFoE1ao5OzIREZHMRUmdJNvly9CzJ6xYYZVbtbKmL8mVy5lRiYiIZE5qfpVkCQ6GSpWshM7Nzaqt+/ZbJXQiIiLOoqRO7osxMHky1KkDJ05AsWKwbRsMGAA2m7OjExERybzU/CpJdumStW7rDz9Y5fbtYfZs8PJyalgiIiKCkjpJoq1boUMHOH0a3N1h6lTo21e1cyIiImmFml/lrmJjYeJEqF/fSuhKlLD60/Xrp4ROREQkLVFNnSTq4kXo1g3WrLHKXbrAzJmQI4dz4xIREZH4lNRJgjZtgo4d4dw58PSE6dOhRw/VzomIiKRVan4VBzEx8Pbb8OSTVkJXujTs2GHNR6eETkREJO1STZ3YnT9vNbGuX2+Ve/Sw5p/Lnt25cYmIiMi9KakTwErkOneGCxcgWzaYNQu6dnV2VCIiIpJUan7N5GJiYMwYeOopK6ErVw527VJCJyIikt6opi4TO3sWOnWyBkWANe/clCnWwAgRERFJX5TUZVJr1lj95/75x5qiZM4ca3JhERERSZ/U/JrJ3L4No0dDs2ZWQlexIuzerYROREQkvXNqUrd582ZatGiBv78/NpuNFStWOOw3xjBmzBj8/Pzw9PSkcePGHDlyxOGYsLAwOnfujJeXF7ly5aJXr15ERkam4lWkH6dOQYMG1goRAC+/DNu3w2OPOTUsERERSQFOTequXbtGhQoVmDFjRoL7J02axLRp05g1axbBwcFkz56dpk2bcvPmTfsxnTt35sCBA6xdu5YffviBzZs307dv39S6hHRj1SqrVm7rVvDygiVLrAmFPTycHZmIiIikBJsxxjg7CACbzcby5ctp2bIlYNXS+fv7M2zYMIYPHw5AeHg4+fPnZ968eXTo0IGDBw9SpkwZduzYQdWqVQFYvXo1zzzzDKdPn8bf3z/Bz4qKiiIqKspejoiIoGDBgoSHh+Pl5fVwLzSV3boFr70Gkydb5apVYdEiKFrUuXGJiIg8qIiICLy9vTPk8zs50myfutDQUM6fP0/jxo3t27y9valevTrbt28HYPv27eTKlcue0AE0btwYFxcXgoODEz33hAkT8Pb2tr8KFiz48C7EiY4fh3r1/pfQvfIK/PKLEjoREZGMKM0mdefPnwcgf/78Dtvz589v33f+/Hny5cvnsD9r1qz4+PjYj0nI6NGjCQ8Pt79OnTqVwtE734oVUKkSBAdDrlxW+aOPwN3dyYGJiIjIQ5EppzRxd3fHPYNmN1FRMHIkTJtmlWvUgG++gYAA58YlIiIiD1earanz9fUF4MKFCw7bL1y4YN/n6+vLxYsXHfbfvn2bsLAw+zGZybFjULv2/xK6ESNg82YldCIiIplBmk3qihQpgq+vL+vjVpfH6hAZHBxMzZo1AahZsyZXrlxh165d9mM2bNhAbGws1atXT/WYnWnJEqhc2VriK08e+OEHmDQJXF2dHZmIiIikBqc2v0ZGRnL06FF7OTQ0lJCQEHx8fChUqBCvvPIK77zzDo899hhFihThzTffxN/f3z5CtnTp0jRr1ow+ffowa9YsoqOjGTBgAB06dEh05GtGc/MmDB0KM2da5Tp14OuvoUAB58YlIiIiqcupSd3OnTtp2LChvTx06FAAAgMDmTdvHiNHjuTatWv07duXK1euUKdOHVavXo3HvyZXW7hwIQMGDKBRo0a4uLjQunVrpsW1P2ZwR45Au3YQEmKVR4+Gt96CrJmyp6SIiEjmlmbmqXOm9DjPzddfQ9++EBkJefPCggXQtKmzoxIREUk96fH5/TCl2T51krAbN6BPH+jUyUroGjSwauqSm9AFBQVhs9m4cuVKCkYpIiIiqS1NJ3VXr17llVdeISAgAE9PT2rVqsWOHTscjomMjGTAgAEUKFAAT09PypQpw6xZs+557iVLllCqVCk8PDzsAy8S079/f2w2G1OmTHmQy3lgBw9CtWrw2Wdgs8GYMbBuHWSS7oMiIiJyF2k6qevduzdr165lwYIF7N+/nyZNmtC4cWPOnDljP2bo0KGsXr2aL7/8koMHD/LKK68wYMAAVq5cmeh5t23bRseOHenVqxd79uyhefPmAPzxxx/xjl2+fDm//vqr0wdezJ9vLfH1+++QP7+VzI0fD1myODUsERERSSPSbFJ348YNli5dyqRJk6hXrx7Fixdn3LhxFC9enJlxQz2xErTAwEAaNGhA4cKF6du3LxUqVOC3335L9NxTp06lWbNmjBgxgtKlS/PGG28AMHv2bIfjzpw5w8CBA1m4cCGuSZgb5Ntvv6VcuXJ4enqSJ08eGjduzLVr1+z7P/vsM0qXLo2HhwelSpXik08+cXj/6dOn6dixIz4+PmTPnp2qVasSFBRM9+7QvTtcvz4TT89ihIW58eKLJVmwYIHD+202G5999hkvvPAC2bJl47HHHouX3P7444+UKFECT09PGjZsyPHjxx32nzhxghYtWpA7d26yZ8/O448/zo8//njPaxcRERHnSrNJ3e3bt4mJiXEY6Qrg6enJL7/8Yi/XqlWLlStXcubMGYwxbNy4kT///JMmTZokeu7t27c7rCkb599Nu7GxsXTt2pURI0bw+OOP3zPec+fO0bFjR3r27MnBgwcJCgqiVatWxI1DWbhwIWPGjOE///kPBw8e5N133+XNN99k/vz5gNWMXL9+fc6cOcPKlSvZu3cvHTqMpHv3WObPB5ttOVmyDGbSpGH8/vvv9OvXjx49erBx40aHOMaPH0+7du3Yt28fzzzzDJ07dyYsLAyAU6dO0apVK1q0aEFISAi9e/fm1VdfdXj/yy+/TFRUFJs3b2b//v2899575MiR457XLyIiIk5m0rCaNWua+vXrmzNnzpjbt2+bBQsWGBcXF1OiRAn7MTdv3jTdunUzgMmaNatxc3Mz8+fPv+t5XV1dzVdffWUvh4eHG8DkzZvXvu3dd981Tz31lImNjTXGGBMQEGA++uijRM+5a9cuA5jjx48nuL9YsWIOn2mMMW+//bapWbOmMcaYTz/91OTMmdNcunTJxMYa89lnxnh4GAPG+PsbU7ZsLdOnTx+H97dt29Y888wz9jJg3njjDXs5MjLSAOann34yxhgzevRoU6ZMGYdzjBo1ygDm8uXLxhhjypUrZ8aNG5fodYqIiKQVcc/v8PBwZ4eSJqTZmjqABQsWYIzh0Ucfxd3dnWnTptGxY0dcXP4X9scff8yvv/7KypUr2bVrF5MnT+bll19m3bp1yf7cXbt2MXXqVObNm4fNZkvSeypUqECjRo0oV64cbdu2Zc6cOVy+fBmAa9eucezYMXr16kWOHDnsr3feeYdjx44BEBISQqVKlXB19aFrV+jd25pYuFkza3TrmTMHqV27tsNn1q5dm4MHDzpsK1++vP3n7Nmz4+XlZV9K7eDBg/FW2rhzkMigQYN45513qF27NmPHjmXfvn1Jun4RERFxrjSd1BUrVoxNmzYRGRnJqVOn+O2334iOjqZo0aKA1e/utdde48MPP6RFixaUL1+eAQMG0L59ez744INEz+vr6xtvTVmA/PnzA7BlyxYuXrxIoUKFyJo1K1mzZuXEiRMMGzaMwoULJ3jOLFmysHbtWn766SfKlCnDxx9/TMmSJQkNDSUyMhKAOXPmEBISYn/9/vvv/Prrr4DVrHztmjUYYuFCawDExImwapU1D11S3dn3z2azERsbm+T39+7dm7/++ouuXbuyf/9+qlatyscff5z0AERERMQp0nRSFyd79uz4+flx+fJl1qxZw/PPPw9AdHQ00dHRDjV3YCVYd0tkatas6bCmbJwnnngCgK5du7Jv3z6HBMzf358RI0awZs2aRM9rs9moXbs248ePZ8+ePbi5ubF8+XLy58+Pv78/f/31F8WLF3d4FSlSBGPgn3/Ks2tXCH/+GUbBgrB5M4waBXGXVrp0abZu3erweVu3bqVMmTJJuodx57hzAElcUvlvBQsWpH///ixbtoxhw4YxZ86cJH+GiIiIOEeaXlBqzZo1GGMoWbIkR48eZcSIEZQqVYoePXoA4OXlRf369RkxYgSenp4EBASwadMmvvjiCz788EP7ebp168ajjz7KhAkTABg8eDD169dn8uTJNG/e3D5YoW/fvgDkyZOHPHnyOMTi6uqKr68vJUuWTDDW4OBg1q9fT5MmTciXLx/BwcH8/ffflC5dGrAGMAwaNAhvb2+aNWtGVFQUO3fu5OzZy+zbN5TFizsC7+Lj05JZsybg6+vH0qV78Pf3p2bNmowYMYJ27dpRqVIlGjduzPfff8+yZcvuq5m5f//+TJ48mREjRtC7d2927drFvHnzHI555ZVXePrppylRogSXL19m48aN9msQERGRNMzZnfruZtGiRaZo0aLGzc3N+Pr6mpdfftlcuXLF4Zhz586Z7t27G39/f+Ph4WFKlixpJk+ebB/gYIwx9evXN4GBgQ7vW7x4sSlRooRxc3MzpUuXvmdHy3sNlPjjjz9M06ZNTd68eY27u7spUaKE+fjjjx2OWbhwoalYsaJxc3MzuXPnNpUr1zP58y8zYEzWrMa8/vpx07p1a+Pl5WWyZctmqlataoKDg+3v/+STT0zRokWNq6urKVGihPniiy8czg+Y5cuXO2zz9vY2c+fOtZe///57U7x4cePu7m7q1q1r/vvf/zoMlBgwYIApVqyYcXd3N3nz5jVdu3Y1//zzT6LXLSIi4iwaKOFIa7+S+mvHGQPTp8Pw4XDrFgQEwKJFcMcYBhEREbkLrf3qKE03v6ZnMTGwZQucOwd+flC3rjX44fJl6NULli+3jnvhBfj8c8id27nxioiISPqmpO4hWLYMBg+G06f/t61AAXj5Zfj0Uzh+HNzcYPJka1sSZ00RERERSZSSuhS2bBm0aWM1sf7b6dMwerT1c7FiVnNrlSqpH5+IiIhkTErqUlBMjFVDd7deip6e8Ntv4OOTenGJiIhIxpcu5qlLL7ZscWxyTciNG6BFGkRERCSlKalLQefOpexxIiIiIkmlpC4F+fml7HEiIiIiSaWkLgXVrWuNck1sNKvNBgULWseJiIiIpCQldSkoSxaYOtX6+c7ELq48ZYp1nIiIiEhKUlKXwlq1gm+/hUcfddxeoIC1vVUr58QlIiIiGZumNHkIWrWC559PeEUJERERkYdBSd1DkiULNGjg7ChEREQks1Dzq4iIiEgGoKROREREJANQUiciIiKSASipExEREckAlNSJiIiIZABK6kREREQyACV1IiIiIhmAkjoRERGRDEBJnYiIiEgGoBUlAGMMABEREU6ORERERJIq7rkd9xzP7JTUAVevXgWgYMGCTo5ERERE7tfVq1fx9vZ2dhhOZzNKb4mNjeXs2bPkzJkTm82WYueNiIigYMGCnDp1Ci8vrxQ7r4iISHrxMJ+FxhiuXr2Kv78/Li7qUaaaOsDFxYUCBQo8tPN7eXkpqRMRkUztYT0LVUP3P0prRURERDIAJXUiIiIiGYCSuofI3d2dsWPH4u7u7uxQREREnELPwtSjgRIiIiIiGYBq6kREREQyACV1IiIiIhmAkjoRERGRDEBJnYiIiEgGkCaSuqCgIGw2G1euXLnrcYULF2bKlCmpElNy2Gw2VqxY4ewwUty8efPIlSuXs8MQEUl3Msrz7WFJ6v1Jj7p3707Lli1T9TNTNKmbNWsWOXPm5Pbt2/ZtkZGRuLq60qBBA4dj436Rx44do1atWpw7d84+K3RKJhHJualpNTlz1pe/ffv2/Pnnn6n6mSIiaUlafL4lR1KTx/tNMtNycjZu3DgqVqyY6p87depU5s2bl6qfmaJJXcOGDYmMjGTnzp32bVu2bMHX15fg4GBu3rxp375x40YKFSpEsWLFcHNzw9fXN0XXXZWU4+npSb58+ZwdhoiI0+j5JvfL29s71RP4FE3qSpYsiZ+fH0FBQfZtQUFBPP/88xQpUoRff/3VYXvDhg3tP8dl+EFBQfTo0YPw8HBsNhs2m41x48bZ33f9+nV69uxJzpw5KVSoELNnz76vGBs0aMCgQYMYOXIkPj4++Pr6Opy/cOHCALzwwgvYbDZ7GeC7776jcuXKeHh4ULRoUcaPH+/wr7Z7iY2NZcKECRQpUgRPT08qVKjAt99+C1iLEjdu3JimTZsSN3VgWFgYBQoUYMyYMRw/ftx+v3Lnzo3NZqN79+73PC/87/6uX7+eqlWrki1bNmrVqsXhw4ftx+zdu5eGDRuSM2dOvLy8qFKliv2PV0L/spw5c6b9D1bJkiVZsGCBw36bzcZnn33GCy+8QLZs2XjsscdYuXJlku+ViEhakhaeb/v37+fJJ5/E09OTPHny0LdvXyIjI+37GzRowCuvvOLwnpYtW9qfFQ0aNODEiRMMGTLE/vlJdbe/6Q/yfEqKK1eu0Lt3b/LmzYuXlxdPPvkke/fuBeDvv//G19eXd9991378tm3bcHNzY/369cybN4/x48ezd+9e+zXH1Z7d7bzwvxq+BQsWULhwYby9venQoQNXr161H/Ptt99Srlw5+++kcePGXLt2DYjfUhgVFcWgQYPIly8fHh4e1KlThx07dtj3J+VZfU8mhXXq1Mk0adLEXn7iiSfMkiVLTP/+/c2YMWOMMcZcv37duLu7m3nz5hljjNm4caMBzOXLl01UVJSZMmWK8fLyMufOnTPnzp0zV69eNcYYExAQYHx8fMyMGTPMkSNHzIQJE4yLi4s5dOhQovEEBgaa559/3l6uX7++8fLyMuPGjTN//vmnmT9/vrHZbObnn382xhhz8eJFA5i5c+eac+fOmYsXLxpjjNm8ebPx8vIy8+bNM8eOHTM///yzKVy4sBk3bpz93IBZvnx5orG88847plSpUmb16tXm2LFjZu7cucbd3d0EBQUZY4w5ffq0yZ07t5kyZYoxxpi2bduaatWqmejoaHP79m2zdOlSA5jDhw+bc+fOmStXriTpvHH3t3r16iYoKMgcOHDA1K1b19SqVcse2+OPP266dOliDh48aP7880+zePFiExISYowxZu7cucbb29t+7LJly4yrq6uZMWOGOXz4sJk8ebLJkiWL2bBhg8O9KFCggPnqq6/MkSNHzKBBg0yOHDnMpUuXEr0/IiJpmTOfb5GRkcbPz8+0atXK7N+/36xfv94UKVLEBAYG2uOpX7++GTx4sEPMzz//vP2YS5cumQIFCpi33nrL/vmJCQgIMB999JG9fLe/6SnxfLp8+XKisTRu3Ni0aNHC7Nixw/z5559m2LBhJk+ePPbnyapVq4yrq6vZsWOHiYiIMEWLFjVDhgyx/z6GDRtmHn/8cfs1X79+PUnnHTt2rMmRI4f9nm/evNn4+vqa1157zRhjzNmzZ03WrFnNhx9+aEJDQ82+ffvMjBkz7L/TO/OPQYMGGX9/f/Pjjz+aAwcOmMDAQJM7d2775yXlWX0vKZ7UzZkzx2TPnt1ER0ebiIgIkzVrVnPx4kXz1VdfmXr16hljjFm/fr0BzIkTJxwuJO6XemcSEScgIMB06dLFXo6NjTX58uUzM2fOTDSehJK6OnXqOBzzxBNPmFGjRtnLCSVnjRo1Mu+++67DtgULFhg/P7+7vi/OzZs3TbZs2cy2bdsctvfq1ct07NjRXl68eLHx8PAwr776qsmePbv5888/7fsS+vIn5bxx71u3bp19/6pVqwxgbty4YYwxJmfOnPY/Qne68/dRq1Yt06dPH4dj2rZta5555hmHe/HGG2/Yy5GRkQYwP/30U4KfISKS1jnz+TZ79myTO3duExkZaT9m1apVxsXFxZw/f94Yc++kLu5z/p2sJSahpO5uf9Mf9PmUWFK3ZcsW4+XlZW7evOmwvVixYubTTz+1l1966SVTokQJ06lTJ1OuXDmH48eOHWsqVKhw3+cdO3asyZYtm4mIiLDvHzFihKlevboxxphdu3YZwBw/fjzB2P+df0RGRhpXV1ezcOFC+/5bt24Zf39/M2nSJId7cbdn9b1kTXqdXtI0aNCAa9eusWPHDi5fvkyJEiXImzcv9evXp0ePHty8eZOgoCCKFi1KoUKF7vv85cuXt/9ss9nw9fXl4sWLyT4HgJ+f3z3PsXfvXrZu3cp//vMf+7aYmBhu3rzJ9evXyZYt213ff/ToUa5fv85TTz3lsP3WrVtUqlTJXm7bti3Lly9n4sSJzJw5k8ceeyxFzguO1+3n5wfAxYsXKVSoEEOHDqV3794sWLCAxo0b07ZtW4oVK5bgZx48eJC+ffs6bKtduzZTp05N9POyZ8+Ol5fXff+uRETSCmc+3w4ePEiFChXInj27/ZjatWsTGxvL4cOHyZ8//4Nf4H3El5S/6ffzfErM3r17iYyMJE+ePA7bb9y4wbFjx+zlDz74gLJly7JkyRJ27dp1z3Vmk3rewoULkzNnTnv53/lChQoVaNSoEeXKlaNp06Y0adKENm3akDt37nifd+zYMaKjo6ldu7Z9m6urK9WqVePgwYMOx97tWX0vKZ7UFS9enAIFCrBx40YuX75M/fr1AfD396dgwYJs27aNjRs38uSTTybr/K6urg5lm81GbGzsQz9HZGQk48ePp1WrVvH2eXh43PMz4/o9rFq1ikcffdRh37+/fNevX2fXrl1kyZKFI0eOpNh5wfG64/pSxF33uHHj6NSpE6tWreKnn35i7NixfPPNN7zwwgv3jCExKfG7EhFJK9L6883FxcXeJztOdHR0smJJyP3Gdz/Pp7ud486+jHH+3df72LFjnD17ltjYWI4fP065cuVS5Lx3u+YsWbKwdu1atm3bxs8//8zHH3/M66+/TnBwMEWKFEnS9SXkbs/qe0nxpA6sUUJBQUFcvnyZESNG2LfXq1ePn376id9++40XX3wx0fe7ubkRExPzMEJLEldX13ifX7lyZQ4fPkzx4sWTdc4yZcrg7u7OyZMn7X8IEjJs2DBcXFz46aefeOaZZ2jevLn9D4SbmxuAQ2xJPW9SlChRghIlSjBkyBA6duzI3LlzE0zqSpcuzdatWwkMDLRv27p1K2XKlHmgzxcRSeuc9XwrXbo08+bN49q1a/bauq1bt+Li4kLJkiUByJs3L+fOnbO/JyYmht9//90+iOFBPv9eHtbzqXLlypw/f56sWbM6DFz8t1u3btGlSxfat29PyZIl6d27N/v377fP2pDQNSflvElhs9moXbs2tWvXZsyYMQQEBLB8+XKGDh3qcFzcwMKtW7cSEBAAWAn3jh074g1ueRAPLal7+eWXiY6OdvhF1q9fnwEDBnDr1i2HL9mdChcuTGRkJOvXr6dChQpky5btns2bKalw4cKsX7+e2rVr4+7uTu7cuRkzZgzPPvsshQoVok2bNri4uLB3715+//133nnnnXueM2fOnAwfPpwhQ4YQGxtLnTp1CA8PZ+vWrXh5eREYGMiqVav473//y/bt26lcuTIjRowgMDCQffv2kTt3bgICArDZbPzwww8888wzeHp6Jum893Ljxg1GjBhBmzZtKFKkCKdPn2bHjh20bt06weNHjBhBu3btqFSpEo0bN+b7779n2bJlrFu37r7vtYhIeuKs51vnzp0ZO3YsgYGBjBs3jr///puBAwfStWtXe9Prk08+ydChQ1m1ahXFihXjww8/jDdvXOHChdm8eTMdOnTA3d2dRx55JHk34g4P6/nUuHFjatasScuWLZk0aRIlSpTg7NmzrFq1ihdeeIGqVavy+uuvEx4ezrRp08iRIwc//vgjPXv25IcffrBfc2hoKCEhIRQoUICcOXMm6bz3EhwczPr162nSpAn58uUjODiYv//+m9KlS8c7Nnv27Lz44ouMGDECHx8fChUqxKRJk7h+/Tq9evW6/xuemCT1vLtPoaGhBjClSpVy2H78+HEDmJIlSzpsT6ijZP/+/U2ePHkMYMaOHWuMSbiDZ4UKFez7E5LQQIl7dSRduXKlKV68uMmaNasJCAiwb1+9erWpVauW8fT0NF5eXqZatWpm9uzZ9v3cY/RrbGysmTJliilZsqRxdXU1efPmNU2bNjWbNm0yFy9eNPnz53cYjHHr1i1TpUoV065dO/u2t956y/j6+hqbzWaP+W7nNSbh+7tnzx4DmNDQUBMVFWU6dOhgChYsaNzc3Iy/v78ZMGCAvWNmQh17P/nkE1O0aFHj6upqSpQoYb744guH/QndC29vbzN37txE74+ISFrnzOfbvn37TMOGDY2Hh4fx8fExffr0sY+0NMZ6Zrz44ovGx8fH5MuXz0yYMCHe82379u2mfPnyxt3d3dwtBUhooMS9/qan1PPpThEREWbgwIHG39/fuLq6moIFC5rOnTubkydPmo0bN5qsWbOaLVu22I8PDQ01Xl5e5pNPPjHGWAM2WrdubXLlymWf3eJe5zUm4QEWH330kT0v+OOPP0zTpk1N3rx5jbu7uylRooT5+OOP7cfemX/cuHHDDBw40DzyyCPG3d3d1K5d2/z222/2/fd6VieFzZg7GuBFREREJN1JE2u/ioiIiMiDUVInIiIikgEoqRMRERHJAJTUiYiIiGQASupEREREMgAldSIiIiIZgJI6ERERkQxASZ2IiIhIBqCkTkRERCQDUFInIiIikgEoqRMRERHJAP4PHNZipKLTX6oAAAAASUVORK5CYII=\n" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "\n", + "data = pd.read_csv('/content/drive/MyDrive/intel hackathon/squad_train.csv',encoding = 'unicode_escape')\n", + "data.drop_duplicates(keep='first')\n", + "data\n", + "df = data[['prompt','response']]" + ], + "metadata": { + "id": "MxI7CfENOsF4" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "df = df.rename(columns={'prompt':'document', 'response':'summary'})\n", + "df = df.dropna()" + ], + "metadata": { + "id": "Ew0OIdz5OykZ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "df['document']= df['document'].apply(lambda x: x.lower())\n", + "df['summary'] = df['summary'].apply(lambda x: x.lower())" + ], + "metadata": { + "id": "8AeEVWEEO3Z7" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "import modin.pandas as mpd\n", + "import time\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Load the dataset\n", + "data = pd.read_csv('/content/drive/MyDrive/intel hackathon/squad_train.csv', encoding='unicode_escape')\n", + "data.drop_duplicates(keep='first', inplace=True)\n", + "\n", + "df = data[['prompt', 'response']]\n", + "df = df.rename(columns={'prompt': 'document', 'response': 'summary'})\n", + "df = df.dropna()\n", + "df['document'] = df['document'].apply(lambda x: x.lower())\n", + "df['summary'] = df['summary'].apply(lambda x: x.lower())\n", + "\n", + "# Function to measure inference time\n", + "def measure_time(func, *args, num_iterations=10):\n", + " start_time = time.time()\n", + " for _ in range(num_iterations):\n", + " func(*args)\n", + " end_time = time.time()\n", + " return (end_time - start_time) / num_iterations\n", + "\n", + "# Measure inference time for pandas\n", + "pandas_time = measure_time(lambda df: df['document'].apply(lambda x: x.lower()), df)\n", + "\n", + "# Convert the dataframe to modin dataframe\n", + "modin_df = mpd.DataFrame(df)\n", + "\n", + "# Measure inference time for modin.pandas\n", + "modin_time = measure_time(lambda df: df['document'].apply(lambda x: x.lower()), modin_df)\n", + "\n", + "# Visual representation\n", + "labels = ['Pandas', 'Modin']\n", + "times = [pandas_time, modin_time]\n", + "\n", + "plt.bar(labels, times, color=['blue', 'orange'])\n", + "plt.ylabel('Time (seconds)')\n", + "plt.title('Inference Time Comparison: Pandas vs Modin (Lowercasing)')\n", + "plt.show()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 469 + }, + "id": "OAqN--2Aq0Ya", + "outputId": "a9a788ce-3927-4af9-ab85-21f123efb1eb" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "UserWarning: Distributing object. This may take some time.\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjkAAAGzCAYAAADNKAZOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABEDUlEQVR4nO3deVwV9eL/8fcB4QAiqKhgiuJWiBsmSi6opYWFli2K2U3BpcU0k8z0dnPpdsMlzRZvlvemXtNywav9NPe01FxyyzL3XEoDVBQUFYQzvz/8cq5HQOF4FBtfz8fjPB6cz8x85jNzzsx5M/OZGYthGIYAAABMxq2kGwAAAHAzEHIAAIApEXIAAIApEXIAAIApEXIAAIApEXIAAIApEXIAAIApEXIAAIApEXIAAIApEXKK6dy5c+rTp4+CgoJksVj0yiuvlHSTbntr1qyRxWLRmjVrSropuMLIkSNlsVhKuhl/anFxcQoJCSnpZphCQevSYrFo5MiRLp/XI488or59+7q83jtN27Zt1bZt25s6j19++UWlSpXSzz//7NT0d1zImTZtmiwWi7Zs2eLU9O+8846mTZumF198UTNmzNCzzz7r4hb+OcTFxclisVz3FRcXV9JNLVRubq6mTp2qtm3bqnz58rJarQoJCVF8fLzT3w/cXHnBLO/l4+OjsLAw/e1vf1NGRkZJN88U8tZtnz59Chz+xhtv2Mc5efLkLW7djVu/fr2WL1+u119/3V6W94/YvHnzSrBlKEhYWJhiYmI0fPhwp6Yv5eL2mN4333yj++67TyNGjCjpppSo559/Xu3bt7e/P3TokIYPH67nnntOUVFR9vJatWopMjJSFy5ckKenZ0k0tUAXLlzQE088oaVLl6p169b661//qvLly+vw4cOaM2eOpk+frqNHj6pq1aol3dSb5m9/+5uGDh1a0s1wyscffyxfX1+dO3dOy5cv1z/+8Q998803Wr9+PUenXMDLy0tJSUn65z//mW+7/eKLL+Tl5aWLFy/e9HZcuHBBpUq59mdq3LhxateunWrXru3Seu9Ey5cvvyXzeeGFF/TII4/o4MGDqlWrVrGmJeQUU2pqqsLCwlxWn81mU3Z2try8vFxW563QvHlzNW/e3P5+y5YtGj58uJo3b66//OUv+ca/3Zbvtdde09KlS/Xee+/lO+U4YsQIvffeeyXTsFsgMzNTpUuXVqlSpVz+A3KrPPXUU6pQoYKkyzvAJ598UvPnz9fGjRsdvpdwTocOHfTVV19pyZIleuyxx+zl33//vQ4dOqQnn3xSSUlJN70drt5vpKamavHixZo8ebJL6y1pedv0rXar/nFt3769ypUrp+nTp+utt94q1rR33OmqgsTFxcnX11fHjh1T586d5evrq4oVK2rw4MHKzc2V9L/DmYcOHdLixYvth2sPHz4sScrKytKIESNUu3ZtWa1WBQcHa8iQIcrKynKYl8ViUf/+/TVz5kzVq1dPVqtVS5culSQdO3ZMvXr1UmBgoKxWq+rVq6fPPvvMYfq8dsyZM0f/+Mc/VLVqVXl5ealdu3Y6cOBAvmXbtGmTHnnkEZUrV06lS5dWw4YN9f777zuMs2fPHj311FMqX768vLy8FBERoa+++spVq7fAPjlt27ZV/fr1tXPnTrVp00Y+Pj6qXbu2/XDxt99+q8jISHl7e+uee+7RypUr89VblPVVkN9//12ffPKJHnzwwQL7VLm7u2vw4MEOR3G2b9+uhx9+WH5+fvL19VW7du20ceNGh+nyToWuW7dOL7/8sipWrKiyZcvq+eefV3Z2ts6cOaMePXqoXLlyKleunIYMGSLDMOzTHz58WBaLRe+++67ee+89Va9eXd7e3mrTpk2+89E7d+5UXFycatasKS8vLwUFBalXr146deqUw3h5p3d++eUXde/eXeXKlVOrVq0chl1pxYoVatWqlcqWLStfX1/dc889+utf/+owTmpqqnr37q3AwEB5eXmpUaNGmj59usM4Vy7Lp59+qlq1aslqtapp06b64YcfHMa9dOmS9uzZoz/++KOgj6tIHnjgAUmXjyhmZ2dr+PDhatKkifz9/VW6dGlFRUVp9erVTrdRkhYsWKD69evLy8tL9evX13//+98C2/Luu++qRYsWCggIkLe3t5o0aVLgaZCirOur1a9fX/fff3++cpvNpipVquipp56yl3355Zdq0qSJypQpIz8/PzVo0CDftl+YKlWqqHXr1po1a5ZD+cyZM9WgQQPVr1+/wOnmzp2rJk2ayNvbWxUqVNBf/vIXHTt2LN94RV2XV/fJyfvOHjhwQHFxcSpbtqz8/f0VHx+v8+fPX3e5Fi9erJycHIej0MXx66+/qkuXLipfvrx8fHx03333afHixfbhhmGoQoUKSkhIsJfZbDaVLVtW7u7uOnPmjL18zJgxKlWqlM6dO2cvK8q+OG8/8+2336pfv36qVKmSw75qyZIlatOmjf1zb9q0qcPnuHbtWnXp0kXVqlWz/1YNGjRIFy5ccJhPcnKy4uPjVbVqVVmtVlWuXFmPPfaY/TdPyt8np7i/T5MmTVLNmjXl7e2tZs2aae3atQX28/Hw8FDbtm21cOHCgj+Ya/hz/ht3E+Tm5io6OlqRkZF69913tXLlSo0fP161atXSiy++qLp162rGjBkaNGiQqlatqldffVWSVLFiRdlsNj366KNat26dnnvuOdWtW1c//fST3nvvPe3bt08LFixwmNc333yjOXPmqH///qpQoYJCQkKUkpKi++67zx6CKlasqCVLlqh3797KyMjI92M8evRoubm5afDgwUpPT9fYsWP1zDPPaNOmTfZxVqxYoY4dO6py5coaOHCggoKCtHv3bi1atEgDBw6UJO3atUstW7ZUlSpVNHToUJUuXVpz5sxR586dlZSUpMcff/ymrfPTp0+rY8eO6tatm7p06aKPP/5Y3bp108yZM/XKK6/ohRdeUPfu3TVu3Dg99dRT+u2331SmTBlJKvb6utKSJUuUk5NT5P5Uu3btUlRUlPz8/DRkyBB5eHjok08+Udu2be1h7EoDBgxQUFCQRo0apY0bN+rTTz9V2bJl9f3336tatWp655139PXXX2vcuHGqX7++evTo4TD9f/7zH509e1YvvfSSLl68qPfff18PPPCAfvrpJwUGBkq6/Nn++uuvio+PV1BQkHbt2qVPP/1Uu3bt0saNG/OFly5duqhOnTp65513HILV1cvZsWNHNWzYUG+99ZasVqsOHDig9evX28e5cOGC2rZtqwMHDqh///6qUaOG5s6dq7i4OJ05c8b+vcoza9YsnT17Vs8//7wsFovGjh2rJ554Qr/++qs8PDwkXQ6rdevWVc+ePTVt2rQifSZXO3jwoCQpICBAGRkZ+te//qWnn35affv21dmzZ/Xvf/9b0dHR2rx5s8LDw4vdxuXLl+vJJ59UWFiYEhMTderUKfsPwNXef/99Pfroo3rmmWeUnZ2tL7/8Ul26dNGiRYsUExNT5HVdkNjYWI0cOVLJyckKCgqyl69bt07Hjx9Xt27dJF3+fjz99NNq166dxowZI0navXu31q9fn+8zKkz37t01cOBAnTt3Tr6+vsrJydHcuXOVkJBQ4KmqadOmKT4+Xk2bNlViYqJSUlL0/vvva/369dq+fbvKli1b7HVZmK5du6pGjRpKTEzUtm3b9K9//UuVKlWyL2thvv/+ewUEBKh69epFnleelJQUtWjRQufPn9fLL7+sgIAATZ8+XY8++qjmzZunxx9/XBaLRS1bttR3331nn27nzp1KT0+Xm5ub1q9fb/8OrF27Vo0bN5avr6+k4u+L+/Xrp4oVK2r48OHKzMyUdPkz6NWrl+rVq6dhw4apbNmy2r59u5YuXaru3btLuhxEz58/rxdffFEBAQHavHmzPvzwQ/3++++aO3euvf4nn3xSu3bt0oABAxQSEqLU1FStWLFCR48evW5n+6L8Pn388cfq37+/oqKiNGjQIB0+fFidO3dWuXLlCvwuNGnSRAsXLlRGRob8/PyK/sEZd5ipU6cakowffvjBXtazZ09DkvHWW285jNu4cWOjSZMmDmXVq1c3YmJiHMpmzJhhuLm5GWvXrnUonzx5siHJWL9+vb1MkuHm5mbs2rXLYdzevXsblStXNk6ePOlQ3q1bN8Pf3984f/68YRiGsXr1akOSUbduXSMrK8s+3vvvv29IMn766SfDMAwjJyfHqFGjhlG9enXj9OnTDnXabDb73+3atTMaNGhgXLx40WF4ixYtjDp16hhF9cMPPxiSjKlTp+Ybltfm1atX28vatGljSDJmzZplL9uzZ499/WzcuNFevmzZsnx1F3V9FWTQoEGGJGP79u1FWrbOnTsbnp6exsGDB+1lx48fN8qUKWO0bt3aXpb33YqOjnZYx82bNzcsFovxwgsv2MtycnKMqlWrGm3atLGXHTp0yJBkeHt7G7///ru9fNOmTYYkY9CgQfaygpbviy++MCQZ3333nb1sxIgRhiTj6aefzjd+3rA87733niHJOHHiRKHrYuLEiYYk4/PPP7eXZWdnG82bNzd8fX2NjIwMh2UJCAgw0tLS7OMuXLjQkGT8v//3//Itd8+ePQud79Vt3rt3r3HixAnj0KFDxieffGJYrVYjMDDQyMzMNHJychy2DcMwjNOnTxuBgYFGr1698s23KG0MDw83KleubJw5c8Zetnz5ckOSUb16dYd5Xf3ZZGdnG/Xr1zceeOABe1lR1nVB9u7da0gyPvzwQ4fyfv36Gb6+vvZ5Dxw40PDz8zNycnKKVb9hXN5HvfTSS0ZaWprh6elpzJgxwzAMw1i8eLFhsViMw4cP2z+HvPZnZ2cblSpVMurXr29cuHDBXteiRYsMScbw4cPtZcVZl5KMESNG2N/nzffKz9EwDOPxxx83AgICrrtsrVq1yrdPN4z/7aPmzp1b6LSvvPKKIclhP3/27FmjRo0aRkhIiJGbm2sYhmGMGzfOcHd3t28LH3zwgVG9enWjWbNmxuuvv24YhmHk5uYaZcuWddimi7ovztvPtGrVyuHzPXPmjFGmTBkjMjLS4TPIqydPQfuOxMREw2KxGEeOHDEM4/L2IskYN25coevDMC7vx6/chxX19ykrK8sICAgwmjZtaly6dMk+3rRp0wxJDnXmmTVrliHJ2LRp0zXbdDVOV13hhRdecHgfFRWlX3/99brTzZ07V3Xr1lVoaKhOnjxpf+UdQr/6MHmbNm0c+vUYhqGkpCR16tRJhmE41BEdHa309HRt27bNoY74+HiH86F5nX3z2rt9+3YdOnRIr7zyiv0/qDx5/+WnpaXpm2++UdeuXXX27Fn7PE+dOqXo6Gjt37+/wEPNruLr62v/z1OS7rnnHpUtW1Z169Z1ODqS93fesjmzvq6UdxVO3lGha8nNzdXy5cvVuXNn1axZ015euXJlde/eXevWrct3VU/v3r0djqRERkbKMAz17t3bXubu7q6IiIgCv1+dO3dWlSpV7O+bNWumyMhIff311/Yyb29v+98XL17UyZMndd9990lSgct+9Xe7IHnfk4ULF8pmsxU4ztdff62goCA9/fTT9jIPDw+9/PLLOnfunL799luH8WNjY1WuXDn7+6u/p5IUEhIiwzCKdRTnnnvuUcWKFVWjRg09//zzql27thYvXiwfHx+5u7vbtw2bzaa0tDTl5OQoIiKiwHVzvTb+8ccf2rFjh3r27Cl/f3/7eA8++GCB/fOu/GxOnz6t9PR0RUVFOcy7KOu6IHfffbfCw8M1e/Zse1lubq7mzZunTp062eddtmxZZWZmasWKFUWu+2rlypVThw4d9MUXX0i6fMSrRYsWBR4F2bJli1JTU9WvXz+HfjQxMTEKDQ21n9Ip7rosTEH76lOnTl33CrtTp045fNbF8fXXX6tZs2b2073S5X3Yc889p8OHD+uXX36xtyU3N1fff/+9pMtHbKKiohQVFaW1a9dKkn7++WedOXPG/l1zZl/ct29fubu729+vWLFCZ8+e1dChQ/P1Zbpyf3Tl9zMzM1MnT55UixYtZBiGtm/fbh/H09NTa9as0enTp4u9rq73+7RlyxadOnVKffv2degX+MwzzxT6+eSVF/eKPkLO//Hy8lLFihUdysqVK1ekD3j//v3atWuXKlas6PC6++67JV3uw3ClGjVqOLw/ceKEzpw5o08//TRfHfHx8QXWUa1atXxtlWRvb97h+8LOnUvSgQMHZBiG3nzzzXzzzbt67Or5ulLVqlXznVbx9/dXcHBwvjLpf8vmzPq6Ut6hzrNnz163jSdOnND58+d1zz335BtWt25d2Ww2/fbbbw7lV382ee0vaLkK+n7VqVMnX9ndd9/tcC48LS1NAwcOVGBgoLy9ve0/+JKUnp6eb/qrv3MFiY2NVcuWLdWnTx8FBgaqW7dumjNnjsOP8JEjR1SnTh25uTnuOurWrWsffqXrfU+dlZSUpBUrVmjNmjU6cOCAfv75ZzVp0sQ+fPr06WrYsKG8vLwUEBCgihUravHixQWum+u1MW+ZCvpcCvpeLFq0SPfdd5+8vLxUvnx5VaxYUR9//LHDvIuyrgsTGxur9evX23/01qxZo9TUVMXGxtrH6devn+6++249/PDDqlq1qnr16mXv+1cc3bt3t5+iWLBggf2Ux9Xy1lFB6yM0NNQ+vLjrsjA38r0yCjldez1HjhwpdD+QN1yS7r33Xvn4+NgDTV7Iad26tbZs2aKLFy/ah+UFJmf2xVdv00XZ50vS0aNHFRcXp/Lly9v7n7Zp00bS//YdVqtVY8aM0ZIlSxQYGKjWrVtr7NixSk5OLtK6Kuo2dfUVbqVKlSr0VFje51bcqyfpk/N/rkzExWWz2dSgQQNNmDChwOFX/7hdmaTzppekv/zlL+rZs2eBdTRs2NDhfWHtLc4GnDffwYMHKzo6usBxbuZlloUtw/WWzZn1daXQ0FBJ0k8//ZSvf4YrFGe5nN3hdu3aVd9//71ee+01hYeHy9fXVzabTR06dCjwh/Lq71xBvL299d1332n16tVavHixli5dqtmzZ+uBBx7Q8uXLndpGXPE9LUjr1q3tV1dd7fPPP1dcXJw6d+6s1157TZUqVZK7u7sSExPtPwQ3q41r167Vo48+qtatW+uf//ynKleuLA8PD02dOtWh8+eNrOvY2FgNGzZMc+fO1SuvvKI5c+bI399fHTp0sI9TqVIl7dixQ8uWLdOSJUu0ZMkSTZ06VT169MjXSfxaHn30UVmtVvXs2VNZWVnq2rVrsdfJzeDsZxYQEHDDAft6PDw8FBkZqe+++04HDhxQcnKyoqKiFBgYqEuXLmnTpk1au3atQkND7f9YO7MvLso2fbXc3Fw9+OCDSktL0+uvv67Q0FCVLl1ax44dU1xcnMO+45VXXlGnTp20YMECLVu2TG+++aYSExP1zTffqHHjxtecz83Y7vM+t8K2+8IQclygVq1a+vHHH9WuXTun7tFRsWJFlSlTRrm5uU73+i+oTdLlw6KF1Zl3+sXDw8Nl870VbnR9Pfzww3J3d9fnn39+3c7HFStWlI+Pj/bu3Ztv2J49e+Tm5pYvxN6o/fv35yvbt2+f/T+c06dPa9WqVRo1apTDDbIKmq643Nzc1K5dO7Vr104TJkzQO++8ozfeeEOrV69W+/btVb16de3cuVM2m83haM6ePXskyakOna42b9481axZU/Pnz3fYHp29t1XeMhW0fq/+XiQlJcnLy0vLli2T1Wq1l0+dOjXftNdb14WpUaOGmjVrptmzZ6t///6aP3++Onfu7DA/6fLlvZ06dVKnTp1ks9nUr18/ffLJJ3rzzTeL/M+Lt7e3OnfurM8//1wPP/xwoT8weeto79699tP0efbu3WsfXpx1eTOEhoY6fel79erVC90P5A3PExUVpTFjxmjlypWqUKGCQkNDZbFYVK9ePa1du1Zr165Vx44d7eO7Yl985T6/sM/3p59+0r59+zR9+nSHCx4KO61Zq1Ytvfrqq3r11Ve1f/9+hYeHa/z48fr888+damOevHV14MABh6sFc3JydPjw4QL/ST106JDc3NzsZ0iKitNVLtC1a1cdO3ZMU6ZMyTfswoUL9p7vhXF3d7ffd6KgW1efOHGi2G269957VaNGDU2cONHhskXpf2m6UqVKatu2rT755JMCL991Zr63wo2ur+DgYPXt21fLly/Xhx9+mG+4zWbT+PHj9fvvv8vd3V0PPfSQFi5c6HC6KCUlRbNmzVKrVq2K19O/CBYsWOBw/n3z5s3atGmTHn74YUn/+y/p6v+KJk6ceEPzTUtLy1eWd6Qr71YIjzzyiJKTkx36hOTk5OjDDz+Ur6+v/bB3cbjiEvIrFbR+Nm3apA0bNjhVX+XKlRUeHq7p06c7nHJasWKFvR/GlfO2WCz2W09Ily9Vv/oKy6Ks62uJjY3Vxo0b9dlnn+nkyZMOp6ok5buVgJubm/2Hoyj1X2nw4MEaMWKE3nzzzULHiYiIUKVKlTR58mSH+pcsWaLdu3fbrygqzrq8GZo3b67Tp08Xqa/l1R555BFt3rzZ4XuUmZmpTz/9VCEhIQ59iqKiopSVlaWJEyeqVatW9rAdFRWlGTNm6Pjx4w43TXXFvvihhx5SmTJllJiYmO/qt7xtoaBtwzCMfLcWOH/+fL46atWqpTJlyhT7+1OQiIgIBQQEaMqUKcrJybGXz5w5s9AjbVu3blW9evUc+nIVBUdyXODZZ5/VnDlz9MILL2j16tVq2bKlcnNztWfPHs2ZM0fLli1TRETENesYPXq0Vq9ercjISPXt21dhYWFKS0vTtm3btHLlygJ3itfi5uamjz/+WJ06dVJ4eLji4+NVuXJl7dmzR7t27dKyZcskXb5PQatWrdSgQQP17dtXNWvWVEpKijZs2KDff/9dP/74o9Pr5Wa60fU1fvx4HTx4UC+//LLmz5+vjh07qly5cjp69Kjmzp2rPXv22DtFv/322/Z7mvTr10+lSpXSJ598oqysLI0dO9bly1a7dm21atVKL774on1HGRAQoCFDhki63Kco7xz5pUuXVKVKFS1fvlyHDh26ofm+9dZb+u677xQTE6Pq1asrNTVV//znP1W1alV734HnnntOn3zyieLi4rR161aFhIRo3rx5Wr9+vSZOnFikztxXc8Ul5Ffq2LGj5s+fr8cff1wxMTE6dOiQJk+erLCwMId7khRHYmKiYmJi1KpVK/Xq1UtpaWn68MMPVa9ePYc6Y2JiNGHCBHXo0EHdu3dXamqqJk2apNq1a2vnzp328Yqyrq+la9euGjx4sAYPHqzy5cvn+++/T58+SktL0wMPPKCqVavqyJEj+vDDDxUeHm7vQ1JUjRo1UqNGja45joeHh8aMGaP4+Hi1adNGTz/9tP0S8pCQEA0aNMg+blHX5c0QExOjUqVKaeXKlXruuefyDU9KSrIfmblSz549NXToUH3xxRd6+OGH9fLLL6t8+fKaPn26Dh06pKSkJIcjm82bN1epUqW0d+9eh/m0bt1aH3/8sSQ5hBzpxvfFfn5+eu+999SnTx81bdrUfl+sH3/8UefPn9f06dMVGhqqWrVqafDgwTp27Jj8/PyUlJSUL1js27dP7dq1U9euXRUWFqZSpUrpv//9r1JSUhwuFnGWp6enRo4cqQEDBuiBBx5Q165ddfjwYU2bNk21atXKd0bk0qVL9vsCFVuxrsUygcIuIS9dunS+ca++xNYwCr6E3DAuX0I5ZswYo169eobVajXKlStnNGnSxBg1apSRnp5uH0//d3lmQVJSUoyXXnrJCA4ONjw8PIygoCCjXbt2xqeffmofp7BLHfMuh736Eu5169YZDz74oFGmTBmjdOnSRsOGDfNdfnrw4EGjR48eRlBQkOHh4WFUqVLF6NixozFv3rwC21kQZy4hr1evXr5xC1u/Ba23oqyva8nJyTH+9a9/GVFRUYa/v7/h4eFhVK9e3YiPj893efm2bduM6Ohow9fX1/Dx8THuv/9+4/vvv3cYp6DvlmEY+S63zXP19y7vMxw3bpwxfvx4Izg42LBarUZUVJTx448/Okz7+++/G48//rhRtmxZw9/f3+jSpYtx/PjxQi+5LehS5au/36tWrTIee+wx46677jI8PT2Nu+66y3j66aeNffv2OUyXkpJixMfHGxUqVDA8PT2NBg0a5Pvcr1yWq13dRmcuIb/Wpdc2m8145513jOrVqxtWq9Vo3LixsWjRIqNnz54OlygXp42GYRhJSUlG3bp1DavVaoSFhRnz58/PV6dhGMa///1vo06dOobVajVCQ0ONqVOnOr2ur6Vly5aGJKNPnz75hs2bN8946KGHjEqVKhmenp5GtWrVjOeff974448/rlvvtfZReQr7HGbPnm00btzYsFqtRvny5Y1nnnnG4XYIeYq6Lov6fc7b9g4dOnTd5Xv00UeNdu3aOZTl7aMKe+VdNn7w4EHjqaeeMsqWLWt4eXkZzZo1MxYtWlTgfJo2bZrvkufff//dkGQEBwcXOE1R9sWF7WfyfPXVV0aLFi0Mb29vw8/Pz2jWrJnxxRdf2If/8ssvRvv27Q1fX1+jQoUKRt++fY0ff/zRYf998uRJ46WXXjJCQ0ON0qVLG/7+/kZkZKQxZ84ch3kVdgl5UX+f8i6vt1qtRrNmzYz169cbTZo0MTp06OAw3pIlSwxJxv79+wtc5muxGMYN9gAE4BKHDx9WjRo1NG7cOA0ePLikmwOYUt5ddffs2VPgVV4oOTabTRUrVtQTTzzh0P2jc+fOslgshd4Z+1rokwMAuGNERUXpoYceuimnmlF0Fy9ezNev8D//+Y/S0tIcHuuQd5f+v//9707Nhz45AIA7ypIlS0q6CXe8jRs3atCgQerSpYsCAgK0bds2/fvf/1b9+vXVpUsX+3h169Z16JxcXIQcAABwS4WEhCg4OFgffPCB0tLSVL58efXo0UOjR4926dPN6ZMDAABMiT45AADAlAg5AADAlO64Pjk2m03Hjx9XmTJlnHoEAwAAuPUMw9DZs2d111135XtIcGHuuJBz/Phxlz9rCAAA3Bq//fabqlatWqRx77iQk3fb+d9++83lzxwCAAA3R0ZGhoKDg4v1+Jg7LuTknaLy8/Mj5AAA8CdTnK4mdDwGAACmRMgBAACmRMgBAACmRMgBAACmRMgBAACmRMgBAACmRMgBAACmRMgBAACmRMgBAACmRMgBAACmRMgBAACmRMgBAACmRMgBAACmRMgBAACmVKqkG2A2xXgCPHDHMYySbgGAOwlHcgAAgCkRcgAAgCkRcgAAgCkRcgAAgCkRcgAAgCkRcgAAgCkRcgAAgCkRcgAAgCkRcgAAgCkRcgAAgCkRcgAAgCkRcgAAgCkRcgAAgCkRcgAAgCkRcgAAgCkRcgAAgCkRcgAAgCkRcgAAgCkRcgAAgCkRcgAAgCkRcgAAgCkRcgAAgCkRcgAAgCkRcgAAgCkRcgAAgCkRcgAAgCkRcgAAgCkRcgAAgCkRcgAAgCkRcgAAgCkRcgAAgCkRcgAAgCndFiFn0qRJCgkJkZeXlyIjI7V58+YiTffll1/KYrGoc+fON7eBAADgT6fEQ87s2bOVkJCgESNGaNu2bWrUqJGio6OVmpp6zekOHz6swYMHKyoq6ha1FAAA/JmUeMiZMGGC+vbtq/j4eIWFhWny5Mny8fHRZ599Vug0ubm5euaZZzRq1CjVrFnzFrYWAAD8WZRoyMnOztbWrVvVvn17e5mbm5vat2+vDRs2FDrdW2+9pUqVKql3797XnUdWVpYyMjIcXgAAwPxKNOScPHlSubm5CgwMdCgPDAxUcnJygdOsW7dO//73vzVlypQizSMxMVH+/v72V3Bw8A23GwAA3P5K/HRVcZw9e1bPPvuspkyZogoVKhRpmmHDhik9Pd3++u23325yKwEAwO2gVEnOvEKFCnJ3d1dKSopDeUpKioKCgvKNf/DgQR0+fFidOnWyl9lsNklSqVKltHfvXtWqVcthGqvVKqvVehNaDwAAbmcleiTH09NTTZo00apVq+xlNptNq1atUvPmzfONHxoaqp9++kk7duywvx599FHdf//92rFjB6eiAACAXYkeyZGkhIQE9ezZUxEREWrWrJkmTpyozMxMxcfHS5J69OihKlWqKDExUV5eXqpfv77D9GXLlpWkfOUAAODOVuIhJzY2VidOnNDw4cOVnJys8PBwLV261N4Z+ejRo3Jz+1N1HQIAALcBi2EYRkk34lbKyMiQv7+/0tPT5efn5/L6LRaXVwmYxp21twHgSs78fnOIBAAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmBIhBwAAmNJtEXImTZqkkJAQeXl5KTIyUps3by503Pnz5ysiIkJly5ZV6dKlFR4erhkzZtzC1gIAgD+DEg85s2fPVkJCgkaMGKFt27apUaNGio6OVmpqaoHjly9fXm+88YY2bNignTt3Kj4+XvHx8Vq2bNktbjkAALidWQzDMEqyAZGRkWratKk++ugjSZLNZlNwcLAGDBigoUOHFqmOe++9VzExMfr73/9+3XEzMjLk7++v9PR0+fn53VDbC2KxuLxKwDRKdm8D4M/Mmd/vEj2Sk52dra1bt6p9+/b2Mjc3N7Vv314bNmy47vSGYWjVqlXau3evWrduXeA4WVlZysjIcHgBAADzK9GQc/LkSeXm5iowMNChPDAwUMnJyYVOl56eLl9fX3l6eiomJkYffvihHnzwwQLHTUxMlL+/v/0VHBzs0mUAAAC3p1LOTHTo0CGtXbtWR44c0fnz51WxYkU1btxYzZs3l5eXl6vbmE+ZMmW0Y8cOnTt3TqtWrVJCQoJq1qyptm3b5ht32LBhSkhIsL/PyMgg6AAAcAcoVsiZOXOm3n//fW3ZskWBgYG666675O3trbS0NB08eFBeXl565pln9Prrr6t69erXra9ChQpyd3dXSkqKQ3lKSoqCgoIKnc7NzU21a9eWJIWHh2v37t1KTEwsMORYrVZZrdbiLCYAADCBIp+uaty4sT744APFxcXpyJEj+uOPP7R161atW7dOv/zyizIyMrRw4ULZbDZFRERo7ty5163T09NTTZo00apVq+xlNptNq1atUvPmzYu8EDabTVlZWUUeHwAAmF+Rj+SMHj1a0dHRhQ63Wq1q27at2rZtq3/84x86fPhwkepNSEhQz549FRERoWbNmmnixInKzMxUfHy8JKlHjx6qUqWKEhMTJV3uYxMREaFatWopKytLX3/9tWbMmKGPP/64qIsCAADuAEUOOdcKOFcLCAhQQEBAkcaNjY3ViRMnNHz4cCUnJys8PFxLly61d0Y+evSo3Nz+d8ApMzNT/fr10++//y5vb2+Fhobq888/V2xsbJHbBwAAzM+p++Rs27ZNHh4eatCggSRp4cKFmjp1qsLCwjRy5Eh5enq6vKGuwn1ygJLDfXIAOOuW3Sfn+eef1759+yRJv/76q7p16yYfHx/NnTtXQ4YMcaZKAAAAl3Iq5Ozbt0/h4eGSpLlz56p169aaNWuWpk2bpqSkJFe2DwAAwClOhRzDMGSz2SRJK1eu1COPPCJJCg4O1smTJ13XOgAAACc5FXIiIiL09ttva8aMGfr2228VExMj6fJNAq++ezEAAEBJcCrkTJw4Udu2bVP//v31xhtv2G/MN2/ePLVo0cKlDQQAAHCGS59CfvHiRbm7u8vDw8NVVbocV1cBJYerqwA4y5nfb6eeXVWYW/HcKgAAgKIocsgpV66cLEU8TJGWluZ0gwAAAFyhyCFn4sSJ9r9PnTqlt99+W9HR0fZnTG3YsEHLli3Tm2++6fJGAgAAFJdTfXKefPJJ3X///erfv79D+UcffaSVK1dqwYIFrmqfy9EnByg59MkB4KxbdsfjZcuWqUOHDvnKO3TooJUrVzpTJQAAgEs5FXICAgK0cOHCfOULFy4s8oM5AQAAbianrq4aNWqU+vTpozVr1igyMlKStGnTJi1dulRTpkxxaQMBAACc4VTIiYuLU926dfXBBx9o/vz5kqS6detq3bp19tADAABQklx6M8A/AzoeAyXnztrbAHClW3ozQJvNpgMHDig1NdX+sM48rVu3drZaAAAAl3Aq5GzcuFHdu3fXkSNHdPWBIIvFotzcXJc0DgAAwFlOhZwXXnhBERERWrx4sSpXrlzkOyEDAADcKk6FnP3792vevHn2p48DAADcbpy6T05kZKQOHDjg6rYAAAC4jFNHcgYMGKBXX31VycnJatCggTw8PByGN2zY0CWNAwAAcJZTl5C7ueU/AGSxWGQYxm3f8ZhLyIGSwyXkAJx1yy4hP3TokDOTAQAA3DJOhZzq1au7uh0AAAAu5fTNAA8ePKiJEydq9+7dkqSwsDANHDhQtWrVclnjAAAAnOXU1VXLli1TWFiYNm/erIYNG6phw4batGmT6tWrpxUrVri6jQAAAMXmVMfjxo0bKzo6WqNHj3YoHzp0qJYvX65t27a5rIGuRsdjoOTQ8RiAs5z5/XbqSM7u3bvVu3fvfOW9evXSL7/84kyVAAAALuVUyKlYsaJ27NiRr3zHjh2qVKnSjbYJAADghjnV8bhv37567rnn9Ouvv6pFixaSpPXr12vMmDFKSEhwaQMBAACc4VSfHMMwNHHiRI0fP17Hjx+XJN1111167bXX9PLLL9/WD+ykTw5QcuiTA8BZzvx+OxVyrnT27FlJUpkyZW6kmluGkAOUHEIOAGfd0jse5+TkqE6dOg7hZv/+/fLw8FBISIgz1QIAALiMUx2P4+Li9P333+cr37Rpk+Li4m60TQAAADfMqZCzfft2tWzZMl/5fffdV+BVVwAAALeaUyHHYrHY++JcKT09/bZ+AjkAALhzOBVyWrdurcTERIdAk5ubq8TERLVq1cpljQMAAHCWUx2Px4wZo9atW+uee+5RVFSUJGnt2rXKyMjQN99849IGAgAAOMOpIzlhYWHauXOnunbtqtTUVJ09e1Y9evTQnj17VL9+fVe3EQAAoNhu+D45fzbcJwcoOXfW3gaAK92yB3RKl09P/eUvf1GLFi107NgxSdKMGTO0bt06Z6sEAABwGadCTlJSkqKjo+Xt7a1t27YpKytL0uWrq9555x2XNhAAAMAZToWct99+W5MnT9aUKVPk4eFhL2/ZsqW2bdvmssYBAAA4y6mQs3fvXrVu3Tpfub+/v86cOXOjbQIAALhhToWcoKAgHThwIF/5unXrVLNmzRtuFAAAwI1yKuT07dtXAwcO1KZNm2SxWHT8+HHNnDlTgwcP1osvvujqNgIAABSbUzcDHDp0qGw2m9q1a6fz58+rdevWslqtGjx4sAYMGODqNgIAABTbDd0nJzs7WwcOHNC5c+cUFhYmX19fV7btpuA+OUDJ4T45AJx1S++TI0menp4KCwtTaGioVq5cqd27d99IdQAAAC7jVMjp2rWrPvroI0nShQsX1LRpU3Xt2lUNGzZUUlKSSxsIAADgDKdCznfffWd/MOd///tf2Ww2nTlzRh988IHefvttlzYQAADAGU6FnPT0dJUvX16StHTpUj355JPy8fFRTEyM9u/f79IGAgAAOMOpkBMcHKwNGzYoMzNTS5cu1UMPPSRJOn36tLy8vFzaQAAAAGc4dQn5K6+8omeeeUa+vr6qXr262rZtK+nyaawGDRq4sn0AAABOcSrk9OvXT5GRkTp69KgefPBBubldPiBUs2ZN+uQAML9Z3CsCuKbut8f9IpwKOZLUpEkTNWnSxKEsJibmhhsEAADgCkXukzN69GhduHChSONu2rRJixcvdrpRAAAAN6rIIeeXX35RtWrV1K9fPy1ZskQnTpywD8vJydHOnTv1z3/+Uy1atFBsbKzKlClzUxoMAABQFEU+XfWf//xHP/74oz766CN1795dGRkZcnd3l9Vq1fnz5yVJjRs3Vp8+fRQXF8dVVgAAoEQ59ewqm82mnTt36siRI7pw4YIqVKig8PBwVahQ4Wa00aV4dhVQckzz7Co6HgPXdhM6Hjvz++1Ux2M3NzeFh4crPDzcmckBAABuuht6QCcAAMDtipADAABMiZADAABMiZADAABM6YZCzoEDB7Rs2TL7TQKduFALAADgpnAq5Jw6dUrt27fX3XffrUceeUR//PGHJKl379569dVXXdpAAAAAZzgVcgYNGqRSpUrp6NGj8vHxsZfHxsZq6dKlLmscAACAs5wKOcuXL9eYMWNUtWpVh/I6deroyJEjxa5v0qRJCgkJkZeXlyIjI7V58+ZCx50yZYqioqJUrlw5lStXTu3bt7/m+AAA4M7kVMjJzMx0OIKTJy0tTVartVh1zZ49WwkJCRoxYoS2bdumRo0aKTo6WqmpqQWOv2bNGj399NNavXq1NmzYoODgYD300EM6duyYM4sCAABMyqmQExUVpf/85z/29xaLRTabTWPHjtX9999frLomTJigvn37Kj4+XmFhYZo8ebJ8fHz02WefFTj+zJkz1a9fP4WHhys0NFT/+te/ZLPZtGrVKmcWBQAAmJRTj3UYO3as2rVrpy1btig7O1tDhgzRrl27lJaWpvXr1xe5nuzsbG3dulXDhg2zl7m5ual9+/basGFDkeo4f/68Ll26pPLlyxc4PCsrS1lZWfb3GRkZRW4fAAD483LqSE79+vW1b98+tWrVSo899pgyMzP1xBNPaPv27apVq1aR6zl58qRyc3MVGBjoUB4YGKjk5OQi1fH666/rrrvuUvv27QscnpiYKH9/f/srODi4yO0DAAB/Xk4dyZEkf39/vfHGG65sS7GNHj1aX375pdasWSMvL68Cxxk2bJgSEhLs7zMyMgg6AADcAZwOORcvXtTOnTuVmpoqm83mMOzRRx8tUh0VKlSQu7u7UlJSHMpTUlIUFBR0zWnfffddjR49WitXrlTDhg0LHc9qtRa7MzQAAPjzcyrkLF26VD169NDJkyfzDbNYLMrNzS1SPZ6enmrSpIlWrVqlzp07S5K9E3H//v0LnW7s2LH6xz/+oWXLlikiIsKZRQAAACbnVJ+cAQMGqEuXLvrjjz9ks9kcXkUNOHkSEhI0ZcoUTZ8+Xbt379aLL76ozMxMxcfHS5J69Ojh0DF5zJgxevPNN/XZZ58pJCREycnJSk5O1rlz55xZFAAAYFJOHclJSUlRQkJCvg7DzoiNjdWJEyc0fPhwJScnKzw8XEuXLrXXffToUbm5/S+Lffzxx8rOztZTTz3lUM+IESM0cuTIG24PAAAwB4vhxFM1e/XqpZYtW6p37943o003VUZGhvz9/ZWeni4/Pz+X12+xuLxKwDRM8wzfWWzowDV1d/3G7szvt1NHcj766CN16dJFa9euVYMGDeTh4eEw/OWXX3amWgAAAJdxKuR88cUXWr58uby8vLRmzRpZrjh8YbFYCDkAAKDEORVy3njjDY0aNUpDhw516C8DAABwu3AqoWRnZys2NpaAAwAAbltOpZSePXtq9uzZrm4LAACAyzh1uio3N1djx47VsmXL1LBhw3wdjydMmOCSxgEAADjLqZDz008/qXHjxpKkn3/+2WGYhWuoAQDAbcCpkLN69WpXtwMAAMCl6DkMAABMqchHcp544glNmzZNfn5+euKJJ6457vz582+4YQAAADeiyCHH39/f3t/G39//pjUIAADAFYr17Kq33npLgwcPlo+Pz81s003Fs6uAksOzq4A7xG3y7Kpi9ckZNWqUzp0751TjAAAAbqVihRwnHlgOAABQIop9dRX3wQEAAH8Gxb5Pzt13333doJOWluZ0gwAAAFyh2CFn1KhRXF0FAABue8UOOd26dVOlSpVuRlsAAABcplh9cuiPAwAA/iy4ugoAAJhSsU5X2Wy2m9UOAAAAl+IBnQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJQIOQAAwJRKPORMmjRJISEh8vLyUmRkpDZv3lzouLt27dKTTz6pkJAQWSwWTZw48dY1FAAA/KmUaMiZPXu2EhISNGLECG3btk2NGjVSdHS0UlNTCxz//PnzqlmzpkaPHq2goKBb3FoAAPBnUqIhZ8KECerbt6/i4+MVFhamyZMny8fHR5999lmB4zdt2lTjxo1Tt27dZLVaizSPrKwsZWRkOLwAAID5lVjIyc7O1tatW9W+ffv/NcbNTe3bt9eGDRtcNp/ExET5+/vbX8HBwS6rGwAA3L5KLOScPHlSubm5CgwMdCgPDAxUcnKyy+YzbNgwpaen21+//faby+oGAAC3r1Il3YCbzWq1FvnUFgAAMI8SO5JToUIFubu7KyUlxaE8JSWFTsUAAOCGlVjI8fT0VJMmTbRq1Sp7mc1m06pVq9S8efOSahYAADCJEj1dlZCQoJ49eyoiIkLNmjXTxIkTlZmZqfj4eElSjx49VKVKFSUmJkq63Fn5l19+sf997Ngx7dixQ76+vqpdu3aJLQcAALj9lGjIiY2N1YkTJzR8+HAlJycrPDxcS5cutXdGPnr0qNzc/new6fjx42rcuLH9/bvvvqt3331Xbdq00Zo1a2518wEAwG3MYhiGUdKNuJUyMjLk7++v9PR0+fn5ubx+i8XlVQKmYZq9zSw2dOCaurt+Y3fm97vEH+sAAABwMxByAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKRFyAACAKd0WIWfSpEkKCQmRl5eXIiMjtXnz5muOP3fuXIWGhsrLy0sNGjTQ119/fYtaCgAA/ixKPOTMnj1bCQkJGjFihLZt26ZGjRopOjpaqampBY7//fff6+mnn1bv3r21fft2de7cWZ07d9bPP/98i1sOAABuZxbDMIySbEBkZKSaNm2qjz76SJJks9kUHBysAQMGaOjQofnGj42NVWZmphYtWmQvu++++xQeHq7Jkydfd34ZGRny9/dXenq6/Pz8XLcg/8dicXmVgGmU7N7GhWaxoQPX1N31G7szv9+lXN6KYsjOztbWrVs1bNgwe5mbm5vat2+vDRs2FDjNhg0blJCQ4FAWHR2tBQsWFDh+VlaWsrKy7O/T09MlXV5ZAG4t02x250u6AcBt7iZs7Hm/28U5NlOiIefkyZPKzc1VYGCgQ3lgYKD27NlT4DTJyckFjp+cnFzg+ImJiRo1alS+8uDgYCdbDcBZ/v4l3QIAt0Tfm7exnz17Vv5F3JmUaMi5FYYNG+Zw5MdmsyktLU0BAQGycG7J1DIyMhQcHKzffvvtppyaBHB7YFu/MxiGobNnz+quu+4q8jQlGnIqVKggd3d3paSkOJSnpKQoKCiowGmCgoKKNb7VapXVanUoK1u2rPONxp+On58fOz7gDsC2bn5FPYKTp0SvrvL09FSTJk20atUqe5nNZtOqVavUvHnzAqdp3ry5w/iStGLFikLHBwAAd6YSP12VkJCgnj17KiIiQs2aNdPEiROVmZmp+Ph4SVKPHj1UpUoVJSYmSpIGDhyoNm3aaPz48YqJidGXX36pLVu26NNPPy3JxQAAALeZEg85sbGxOnHihIYPH67k5GSFh4dr6dKl9s7FR48elZvb/w44tWjRQrNmzdLf/vY3/fWvf1WdOnW0YMEC1a9fv6QWAbcpq9WqESNG5DtdCcBc2NZRmBK/Tw4AAMDNUOJ3PAYAALgZCDkAAMCUCDkAAMCUCDkAAMCUCDm4I8XFxalz584l3QwAN8maNWtksVh05swZSdK0adO4EewdiJCDEhcXFyeLxSKLxSJPT0/Vrl1bb731lnJyckq6aQBukrzt/oUXXsg37KWXXpLFYlFcXJzL5hcbG6t9+/a5rD78ORBycFvo0KGD/vjjD+3fv1+vvvqqRo4cqXHjxpV0swDcRMHBwfryyy914cIFe9nFixc1a9YsVatWzaXz8vb2VqVKlVxaJ25/hBzcFqxWq4KCglS9enW9+OKLat++vb766itNmDBBDRo0UOnSpRUcHKx+/frp3Llz9unyDkEvW7ZMdevWla+vrz0w5cnNzVVCQoLKli2rgIAADRkyRFffHmrp0qVq1aqVfZyOHTvq4MGD9uHZ2dnq37+/KleuLC8vL1WvXt1+F24Azrn33nsVHBys+fPn28vmz5+vatWqqXHjxvayrKwsvfzyy6pUqZK8vLzUqlUr/fDDDw51ff3117r77rvl7e2t+++/X4cPH3YYfvXpqpEjRyo8PFwzZsxQSEiI/P391a1bN509e/amLCtKBiEHtyVvb29lZ2fLzc1NH3zwgXbt2qXp06frm2++0ZAhQxzGPX/+vN59913NmDFD3333nY4eParBgwfbh48fP17Tpk3TZ599pnXr1iktLU3//e9/HerIzMxUQkKCtmzZolWrVsnNzU2PP/64bDabJOmDDz7QV199pTlz5mjv3r2aOXOmQkJCbvp6AMyuV69emjp1qv39Z599Zn+sT54hQ4YoKSlJ06dP17Zt21S7dm1FR0crLS1NkvTbb7/piSeeUKdOnbRjxw716dNHQ4cOve68Dx48qAULFmjRokVatGiRvv32W40ePdq1C4iSZQAlrGfPnsZjjz1mGIZh2Gw2Y8WKFYbVajUGDx6cb9y5c+caAQEB9vdTp041JBkHDhywl02aNMkIDAy0v69cubIxduxY+/tLly4ZVatWtc+zICdOnDAkGT/99JNhGIYxYMAA44EHHjBsNpuziwngCnnbfWpqqmG1Wo3Dhw8bhw8fNry8vIwTJ04Yjz32mNGzZ0/j3LlzhoeHhzFz5kz7tNnZ2cZdd91l366HDRtmhIWFOdT/+uuvG5KM06dPG4ZxeV/h7+9vHz5ixAjDx8fHyMjIsJe99tprRmRk5M1baNxyJf7sKkCSFi1aJF9fX126dEk2m03du3fXyJEjtXLlSiUmJmrPnj3KyMhQTk6OLl68qPPnz8vHx0eS5OPjo1q1atnrqly5slJTUyVJ6enp+uOPPxQZGWkfXqpUKUVERDicstq/f7+GDx+uTZs26eTJk/YjOEePHlX9+vUVFxenBx98UPfcc486dOigjh076qGHHroVqwYwtYoVKyomJkbTpk2TYRiKiYlRhQoV7MMPHjyoS5cuqWXLlvYyDw8PNWvWTLt375Yk7d6922Ebl6TmzZtfd94hISEqU6aM/f2V+w6YA6ercFu4//77tWPHDu3fv18XLlzQ9OnTdeLECXXs2FENGzZUUlKStm7dqkmTJkm63Ecmj4eHh0NdFoslX5+b6+nUqZPS0tI0ZcoUbdq0SZs2bXKYz7333qtDhw7p73//uy5cuKCuXbvqqaeeupFFBvB/evXqpWnTpmn69Onq1avXLZtvQfuOvH9wYA6EHNwWSpcurdq1a6tatWoqVeryAcatW7fKZrNp/Pjxuu+++3T33Xfr+PHjxarX399flStXtocWScrJydHWrVvt70+dOqW9e/fqb3/7m9q1a6e6devq9OnT+ery8/NTbGyspkyZotmzZyspKcneJwCA8zp06KDs7GxdunRJ0dHRDsNq1aolT09PrV+/3l526dIl/fDDDwoLC5Mk1a1bV5s3b3aYbuPGjTe/4bjtcboKt63atWvr0qVL+vDDD9WpUyetX79ekydPLnY9AwcO1OjRo1WnTh2FhoZqwoQJ9huESVK5cuUUEBCgTz/9VJUrV9bRo0fzdVqcMGGCKleurMaNG8vNzU1z585VUFAQNxcDXMDd3d1+6snd3d1hWOnSpfXiiy/qtddeU/ny5VWtWjWNHTtW58+fV+/evSVJL7zwgsaPH6/XXntNffr00datWzVt2rRbvRi4DXEkB7etRo0aacKECRozZozq16+vmTNnOnXZ9quvvqpnn31WPXv2VPPmzVWmTBk9/vjj9uFubm768ssvtXXrVtWvX1+DBg3Kd4+eMmXKaOzYsYqIiFDTpk11+PBhff3113JzYxMCXMHPz09+fn4FDhs9erSefPJJPfvss7r33nt14MABLVu2TOXKlZMkVatWTUlJSVqwYIEaNWqkyZMn65133rmVzcdtymIUt/MCAADAnwD/hgIAAFMi5AAAAFMi5AAAAFMi5AAAAFMi5AAAAFMi5AAAAFMi5AAAAFMi5AAAAFMi5AAAAFMi5AAAAFMi5AAAAFP6/6xtAY45mVR3AAAAAElFTkSuQmCC\n" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "import modin.pandas as mpd\n", + "import time\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Load the dataset\n", + "data = pd.read_csv('/content/drive/MyDrive/intel hackathon/squad_train.csv', encoding='unicode_escape')\n", + "data.drop_duplicates(keep='first', inplace=True)\n", + "\n", + "# Function to preprocess the entire dataset (all columns)\n", + "def preprocess_df(df):\n", + " df['document'] = df['document'].apply(lambda x: x.lower())\n", + " df['summary'] = df['summary'].apply(lambda x: x.lower())\n", + " return df\n", + "\n", + "# Measure inference time for pandas\n", + "pandas_time = measure_time(preprocess_df, data)\n", + "\n", + "# Convert the dataframe to modin dataframe\n", + "modin_df = mpd.DataFrame(data)\n", + "\n", + "# Measure inference time for modin.pandas\n", + "modin_time = measure_time(preprocess_df, modin_df)\n", + "\n", + "# Get the number of rows in the dataset\n", + "num_rows = len(data)\n", + "\n", + "# Visual representation\n", + "labels = ['Pandas', 'Modin']\n", + "times = [pandas_time, modin_time]\n", + "\n", + "plt.bar(labels, times, color=['blue', 'orange'])\n", + "plt.ylabel('Time (seconds)')\n", + "plt.title(f'Inference Time Comparison: Pandas vs Modin (Lowercasing) for {num_rows} Rows')\n", + "plt.show()\n" + ], + "metadata": { + "id": "a1_K3fsxrpx6" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "import modin.pandas as mpd\n", + "import time\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Load the dataset\n", + "data = pd.read_csv('/content/drive/MyDrive/intel hackathon/squad_train.csv', encoding='unicode_escape')\n", + "data.drop_duplicates(keep='first', inplace=True)\n", + "\n", + "df = data[['prompt', 'response']]\n", + "df = df.rename(columns={'prompt': 'document', 'response': 'summary'})\n", + "df = df.dropna()\n", + "df['document'] = df['document'].apply(lambda x: x.lower())\n", + "df['summary'] = df['summary'].apply(lambda x: x.lower())\n", + "\n", + "# Function to measure inference time\n", + "def measure_time(func, *args, num_iterations=10):\n", + " start_time = time.time()\n", + " for _ in range(num_iterations):\n", + " func(*args)\n", + " end_time = time.time()\n", + " return (end_time - start_time) / num_iterations\n", + "\n", + "# Measure inference time for pandas\n", + "pandas_time = measure_time(lambda df: df['document'].apply(lambda x: x.lower()), df)\n", + "\n", + "# Convert the dataframe to modin dataframe\n", + "modin_df = mpd.DataFrame(df)\n", + "\n", + "# Measure inference time for modin.pandas\n", + "modin_time = measure_time(lambda df: df['document'].apply(lambda x: x.lower()), modin_df)\n", + "\n", + "# Get the number of rows in the dataset\n", + "num_rows = len(df)\n", + "\n", + "# Visual representation\n", + "labels = ['Pandas', 'Modin']\n", + "times = [pandas_time, modin_time]\n", + "\n", + "plt.bar(labels, times, color=['blue', 'orange'])\n", + "plt.ylabel('Time (seconds)')\n", + "plt.title(f'Inference Time Comparison: Pandas vs Modin (Lowercasing)\\nNumber of Rows: {num_rows}')\n", + "plt.show()\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 490 + }, + "id": "jv9hCv3srKpp", + "outputId": "e2ec8900-79ba-42e8-86e2-333031cdd324" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "UserWarning: Distributing object. This may take some time.\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkEAAAHICAYAAABJUz4gAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABYVklEQVR4nO3de1yO9+M/8Ndd6i7SOR1IUc4iE8kqRpSzOYUZ5TR8HNMctgmbLcctG2NsoxnmkLGvQw6NiZE5bOYsyrmSVFRK3e/fH35dc+uOulU3rtfz8bgf3O/rfb2v93Xdp1fX9b6uSyGEECAiIiKSGT1dd4CIiIhIFxiCiIiISJYYgoiIiEiWGIKIiIhIlhiCiIiISJYYgoiIiEiWGIKIiIhIlhiCiIiISJYYgoiIiEiWGILKwcOHDzF8+HDY2dlBoVBg4sSJuu7SK+/AgQNQKBQ4cOCArrtCT5k1axYUCoWuu/FaCwoKgrOzs6678UbQtC0VCgVmzZpV5svq3LkzRowYUebtyk3btm3Rtm3bcl3GuXPnUKlSJZw5c6bU8zIEabB69WooFAocP35cq/m/+OILrF69GqNHj8aaNWvw/vvvl3EPXw9BQUFQKBQvfAQFBem6q8UqKCjAqlWr0LZtW1haWkKpVMLZ2RnBwcFavz+ofBUGt8JH5cqV0bBhQ3zyySfIzMzUdffeCIXbdvjw4Rqnf/zxx1Kd1NTUCu7dyzt8+DD27NmDqVOnSmWFf6ht3rxZhz0jTRo2bIguXbogLCys1PNWKof+yN7vv/+OVq1aYebMmbruik598MEH8PPzk54nJCQgLCwMI0eOhI+Pj1Tu4uICT09P5OTkwNDQUBdd1SgnJwe9evVCdHQ0fH198dFHH8HS0hKJiYnYuHEjIiMjcf36ddSoUUPXXS03n3zyCaZNm6brbmhl2bJlMDExwcOHD7Fnzx58/vnn+P3333H48GHu3SoDRkZGiIqKwrffflvkc7t+/XoYGRnh0aNH5d6PnJwcVKpUtj9lCxYsQPv27eHq6lqm7crRnj17KmQ5o0aNQufOnXHlyhW4uLiUeD6GoHKQkpKChg0blll7KpUKeXl5MDIyKrM2K4KXlxe8vLyk58ePH0dYWBi8vLwwaNCgIvVftfX78MMPER0dja+++qrIIc2ZM2fiq6++0k3HKkBWVhaqVKmCSpUqlfkPTEXp06cPrK2tATz5guzduze2bNmCo0ePqr0vSTsBAQH47bffsGvXLvTo0UMq//PPP5GQkIDevXsjKiqq3PtR1t8bKSkp2LFjB5YvX16m7epa4We6olXUH7Z+fn6wsLBAZGQkPv300xLPx8NhJRQUFAQTExPcunULPXv2hImJCWxsbBAaGoqCggIA/+0uTUhIwI4dO6TdwYmJiQCA3NxczJw5E66urlAqlXB0dMSUKVOQm5urtiyFQoGxY8di7dq1aNSoEZRKJaKjowEAt27dwtChQ2FrawulUolGjRrhxx9/VJu/sB8bN27E559/jho1asDIyAjt27dHfHx8kXWLi4tD586dYWFhgSpVqqBJkyZYvHixWp0LFy6gT58+sLS0hJGRETw8PPDbb7+V1ebVOCaobdu2aNy4MU6fPo02bdqgcuXKcHV1lXZH//HHH/D09ISxsTHq1auHffv2FWm3JNtLk5s3b+K7775Dhw4dNI7p0tfXR2hoqNpeoFOnTqFTp04wNTWFiYkJ2rdvj6NHj6rNV3io9dChQxg/fjxsbGxgbm6ODz74AHl5eUhPT8fgwYNhYWEBCwsLTJkyBUIIaf7ExEQoFAosXLgQX331FZycnGBsbIw2bdoUOR5++vRpBAUFoXbt2jAyMoKdnR2GDh2Ke/fuqdUrPHx07tw5DBw4EBYWFvD29lab9rS9e/fC29sb5ubmMDExQb169fDRRx+p1UlJScGwYcNga2sLIyMjNG3aFJGRkWp1nl6XFStWwMXFBUqlEi1atMBff/2lVvfx48e4cOEC7ty5o+nlKpF27doBeLJHMi8vD2FhYWjevDnMzMxQpUoV+Pj4YP/+/Vr3EQC2bt2Kxo0bw8jICI0bN8avv/6qsS8LFy5E69atYWVlBWNjYzRv3lzjYZaSbOtnNW7cGO+8806RcpVKherVq6NPnz5S2S+//ILmzZujatWqMDU1hZubW5HPfnGqV68OX19frFu3Tq187dq1cHNzQ+PGjTXOt2nTJjRv3hzGxsawtrbGoEGDcOvWrSL1Srotnx0TVPiejY+PR1BQEMzNzWFmZobg4GBkZ2e/cL127NiB/Px8tb3YpXH16lX07dsXlpaWqFy5Mlq1aoUdO3ZI04UQsLa2RkhIiFSmUqlgbm4OfX19pKenS+Xz5s1DpUqV8PDhQ6msJN/Fhd8zf/zxB8aMGYNq1aqpfVft2rULbdq0kV73Fi1aqL2OsbGx6Nu3L2rWrCn9Vk2aNAk5OTlqy0lKSkJwcDBq1KgBpVIJe3t79OjRQ/rNA4qOCSrt79PSpUtRu3ZtGBsbo2XLloiNjdU4zsjAwABt27bFtm3bNL8wxXg9/8TTkYKCAvj7+8PT0xMLFy7Evn37sGjRIri4uGD06NFo0KAB1qxZg0mTJqFGjRqYPHkyAMDGxgYqlQrdu3fHoUOHMHLkSDRo0AD//vsvvvrqK1y6dAlbt25VW9bvv/+OjRs3YuzYsbC2toazszOSk5PRqlUrKSTZ2Nhg165dGDZsGDIzM4v8WM+dOxd6enoIDQ1FRkYG5s+fj/feew9xcXFSnb1796Jr166wt7fHhAkTYGdnh/Pnz2P79u2YMGECAODs2bN4++23Ub16dUybNg1VqlTBxo0b0bNnT0RFReHdd98tt21+//59dO3aFf3790ffvn2xbNky9O/fH2vXrsXEiRMxatQoDBw4EAsWLECfPn1w48YNVK1aFQBKvb2etmvXLuTn55d4PNfZs2fh4+MDU1NTTJkyBQYGBvjuu+/Qtm1bKaw9bdy4cbCzs8Ps2bNx9OhRrFixAubm5vjzzz9Rs2ZNfPHFF9i5cycWLFiAxo0bY/DgwWrz//TTT3jw4AH+97//4dGjR1i8eDHatWuHf//9F7a2tgCevLZXr15FcHAw7OzscPbsWaxYsQJnz57F0aNHi4Sbvn37ok6dOvjiiy/Ugtez69m1a1c0adIEn376KZRKJeLj43H48GGpTk5ODtq2bYv4+HiMHTsWtWrVwqZNmxAUFIT09HTpfVVo3bp1ePDgAT744AMoFArMnz8fvXr1wtWrV2FgYADgSZht0KABhgwZgtWrV5foNXnWlStXAABWVlbIzMzE999/jwEDBmDEiBF48OABfvjhB/j7++PYsWNwd3cvdR/37NmD3r17o2HDhggPD8e9e/ekH4hnLV68GN27d8d7772HvLw8/PLLL+jbty+2b9+OLl26lHhbaxIYGIhZs2YhKSkJdnZ2UvmhQ4dw+/Zt9O/fH8CT98eAAQPQvn17zJs3DwBw/vx5HD58uMhrVJyBAwdiwoQJePjwIUxMTJCfn49NmzYhJCRE46Gw1atXIzg4GC1atEB4eDiSk5OxePFiHD58GKdOnYK5uXmpt2Vx+vXrh1q1aiE8PBwnT57E999/j2rVqknrWpw///wTVlZWcHJyKvGyCiUnJ6N169bIzs7G+PHjYWVlhcjISHTv3h2bN2/Gu+++C4VCgbfffhsHDx6U5jt9+jQyMjKgp6eHw4cPS++B2NhYNGvWDCYmJgBK/108ZswY2NjYICwsDFlZWQCevAZDhw5Fo0aNMH36dJibm+PUqVOIjo7GwIEDATwJqtnZ2Rg9ejSsrKxw7NgxfPPNN7h58yY2bdoktd+7d2+cPXsW48aNg7OzM1JSUrB3715cv379hScDlOT3admyZRg7dix8fHwwadIkJCYmomfPnrCwsND4XmjevDm2bduGzMxMmJqaluxFE1TEqlWrBADx119/SWVDhgwRAMSnn36qVrdZs2aiefPmamVOTk6iS5cuamVr1qwRenp6IjY2Vq18+fLlAoA4fPiwVAZA6OnpibNnz6rVHTZsmLC3txepqalq5f379xdmZmYiOztbCCHE/v37BQDRoEEDkZubK9VbvHixACD+/fdfIYQQ+fn5olatWsLJyUncv39frU2VSiX9v3379sLNzU08evRIbXrr1q1FnTp1REn99ddfAoBYtWpVkWmFfd6/f79U1qZNGwFArFu3Tiq7cOGCtH2OHj0qle/evbtI2yXdXppMmjRJABCnTp0q0br17NlTGBoaiitXrkhlt2/fFlWrVhW+vr5SWeF7y9/fX20be3l5CYVCIUaNGiWV5efnixo1aog2bdpIZQkJCQKAMDY2Fjdv3pTK4+LiBAAxadIkqUzT+q1fv14AEAcPHpTKZs6cKQCIAQMGFKlfOK3QV199JQCIu3fvFrstIiIiBADx888/S2V5eXnCy8tLmJiYiMzMTLV1sbKyEmlpaVLdbdu2CQDi//7v/4qs95AhQ4pd7rN9vnjxorh7965ISEgQ3333nVAqlcLW1lZkZWWJ/Px8tc+GEELcv39f2NraiqFDhxZZbkn66O7uLuzt7UV6erpUtmfPHgFAODk5qS3r2dcmLy9PNG7cWLRr104qK8m21uTixYsCgPjmm2/UyseMGSNMTEykZU+YMEGYmpqK/Pz8UrUvxJPvqP/9738iLS1NGBoaijVr1gghhNixY4dQKBQiMTFReh0K+5+XlyeqVasmGjduLHJycqS2tm/fLgCIsLAwqaw02xKAmDlzpvS8cLlPv45CCPHuu+8KKyurF66bt7d3ke90If77jtq0aVOx806cOFEAUPuef/DggahVq5ZwdnYWBQUFQgghFixYIPT19aXPwtdffy2cnJxEy5YtxdSpU4UQQhQUFAhzc3O1z3RJv4sLv2e8vb3VXt/09HRRtWpV4enpqfYaFLZTSNN3R3h4uFAoFOLatWtCiCefFwBiwYIFxW4PIZ58jz/9HVbS36fc3FxhZWUlWrRoIR4/fizVW716tQCg1mahdevWCQAiLi7uuX16Gg+HldKoUaPUnvv4+ODq1asvnG/Tpk1o0KAB6tevj9TUVOlRuIv+2d3wbdq0URtXJIRAVFQUunXrBiGEWhv+/v7IyMjAyZMn1doIDg5WOx5bOBi5sL+nTp1CQkICJk6cKP0FVqhwL0FaWhp+//139OvXDw8ePJCWee/ePfj7++Py5csad2WXFRMTE+kvVwCoV68ezM3N0aBBA7W9K4X/L1w3bbbX0wrPIircq/Q8BQUF2LNnD3r27InatWtL5fb29hg4cCAOHTpU5KykYcOGqe2J8fT0hBACw4YNk8r09fXh4eGh8f3Vs2dPVK9eXXresmVLeHp6YufOnVKZsbGx9P9Hjx4hNTUVrVq1AgCN6/7se1uTwvfJtm3boFKpNNbZuXMn7OzsMGDAAKnMwMAA48ePx8OHD/HHH3+o1Q8MDISFhYX0/Nn3KQA4OztDCFGqvUD16tWDjY0NatWqhQ8++ACurq7YsWMHKleuDH19femzoVKpkJaWhvz8fHh4eGjcNi/q4507d/D3339jyJAhMDMzk+p16NBB4/jAp1+b+/fvIyMjAz4+PmrLLsm21qRu3bpwd3fHhg0bpLKCggJs3rwZ3bp1k5Ztbm6OrKws7N27t8RtP8vCwgIBAQFYv349gCd7zFq3bq1xL8rx48eRkpKCMWPGqI3j6dKlC+rXry8dMirttiyOpu/qe/fuvfAMwXv37qm91qWxc+dOtGzZUjqcDDz5Dhs5ciQSExNx7tw5qS8FBQX4888/ATzZ4+Pj4wMfHx/ExsYCAM6cOYP09HTpvabNd/GIESOgr68vPd+7dy8ePHiAadOmFRlL9fT30dPvz6ysLKSmpqJ169YQQuDUqVNSHUNDQxw4cAD3798v9bZ60e/T8ePHce/ePYwYMUJtXOJ7771X7OtTWF6aMxIZgkrByMgINjY2amUWFhYlegNcvnwZZ8+ehY2Njdqjbt26AJ6MoXharVq11J7fvXsX6enpWLFiRZE2goODNbZRs2bNIn0FIPW38PBAccfuASA+Ph5CCMyYMaPIcgvPfnt2uWWpRo0aRQ7bmJmZwdHRsUgZ8N+6abO9nla4K/XBgwcv7OPdu3eRnZ2NevXqFZnWoEEDqFQq3LhxQ6382demsP+a1kvT+6tOnTpFyurWrat2LD4tLQ0TJkyAra0tjI2NpUAAABkZGUXmf/Y9p0lgYCDefvttDB8+HLa2tujfvz82btyo9iN97do11KlTB3p66l8vDRo0kKY/7UXvU21FRUVh7969OHDgAOLj43HmzBk0b95cmh4ZGYkmTZrAyMgIVlZWsLGxwY4dOzRumxf1sXCdNL0umt4X27dvR6tWrWBkZARLS0vY2Nhg2bJlassuybYuTmBgIA4fPiz9KB44cAApKSkIDAyU6owZMwZ169ZFp06dUKNGDQwdOlQae1gaAwcOlA6BbN26VTqk8qzCbaRpe9SvX1+aXtptWZyXeV+JYg4Hv8i1a9eK/R4onA4Ab731FipXriwFnsIQ5Ovri+PHj+PRo0fStMJApc138bOf6ZJ85wPA9evXERQUBEtLS2n8a5s2bQD8992hVCoxb9487Nq1C7a2tvD19cX8+fORlJRUom1V0s/Us2foVapUqdhDbYWvW2nO/uSYoFJ4OlGXlkqlgpubG7788kuN05/98Xs6iRfODwCDBg3CkCFDNLbRpEkTtefF9bc0H/DC5YaGhsLf319jnfI8jbS4dXjRummzvZ5Wv359AMC///5bZHxIWSjNemn7hdyvXz/8+eef+PDDD+Hu7g4TExOoVCoEBARo/CF99j2nibGxMQ4ePIj9+/djx44diI6OxoYNG9CuXTvs2bNHq89IWbxPNfH19ZXODnvWzz//jKCgIPTs2RMffvghqlWrBn19fYSHh0s/FOXVx9jYWHTv3h2+vr749ttvYW9vDwMDA6xatUptcOrLbOvAwEBMnz4dmzZtwsSJE7Fx40aYmZkhICBAqlOtWjX8/fff2L17N3bt2oVdu3Zh1apVGDx4cJFB7M/TvXt3KJVKDBkyBLm5uejXr1+pt0l50PY1s7KyeukA/iIGBgbw9PTEwYMHER8fj6SkJPj4+MDW1haPHz9GXFwcYmNjUb9+fekPb22+i0vymX5WQUEBOnTogLS0NEydOhX169dHlSpVcOvWLQQFBal9d0ycOBHdunXD1q1bsXv3bsyYMQPh4eH4/fff0axZs+cupzw+94WvW3Gfe00YgiqIi4sL/vnnH7Rv316ra5TY2NigatWqKCgo0PqsBU19Ap7sdi2uzcLDOwYGBmW23IrwsturU6dO0NfXx88///zCwdE2NjaoXLkyLl68WGTahQsXoKenVyTkvqzLly8XKbt06ZL0F9L9+/cRExOD2bNnq11ATNN8paWnp4f27dujffv2+PLLL/HFF1/g448/xv79++Hn5wcnJyecPn0aKpVKbW/QhQsXAECrAadlbfPmzahduza2bNmi9nnU9tpeheukafs++76IioqCkZERdu/eDaVSKZWvWrWqyLwv2tbFqVWrFlq2bIkNGzZg7Nix2LJlC3r27Km2PODJ6cvdunVDt27doFKpMGbMGHz33XeYMWNGif+4MTY2Rs+ePfHzzz+jU6dOxf4AFW6jixcvSsMACl28eFGaXpptWR7q16+v9an9Tk5OxX4PFE4v5OPjg3nz5mHfvn2wtrZG/fr1oVAo0KhRI8TGxiI2NhZdu3aV6pfFd/HT3/nFvb7//vsvLl26hMjISLUTMoo7bOri4oLJkydj8uTJuHz5Mtzd3bFo0SL8/PPPWvWxUOG2io+PVzvbMT8/H4mJiRr/iE1ISICenp50hKUkeDisgvTr1w+3bt3CypUri0zLycmRRu4XR19fX7ruhqZLg9+9e7fUfXrrrbdQq1YtREREqJ2WCfyXxqtVq4a2bdviu+++03h6sjbLrQgvu70cHR0xYsQI7NmzB998802R6SqVCosWLcLNmzehr6+Pjh07Ytu2bWqHo5KTk7Fu3Tp4e3uX/EyFEtq6dava8f9jx44hLi4OnTp1AvDfX1nP/lUVERHxUstNS0srUla4p6zwUg+dO3dGUlKS2piU/Px8fPPNNzAxMZF2q5dGWZwi/zRN2ycuLg5HjhzRqj17e3u4u7sjMjJS7ZDW3r17pXEgTy9boVBIl9YAnpyK/+wZoiXZ1s8TGBiIo0eP4scff0RqaqraoTAARS6VoKenJ/2wlKT9p4WGhmLmzJmYMWNGsXU8PDxQrVo1LF++XK39Xbt24fz589IZUaXZluXBy8sL9+/fL9FYz2d17twZx44dU3sfZWVlYcWKFXB2dlYb0+Tj44Pc3FxERETA29tbCuM+Pj5Ys2YNbt++rXZR2bL4Lu7YsSOqVq2K8PDwImfvFX4WNH02hBBFLp2QnZ1dpA0XFxdUrVq11O8fTTw8PGBlZYWVK1ciPz9fKl+7dm2xe+pOnDiBRo0aqY0lexHuCaog77//PjZu3IhRo0Zh//79ePvtt1FQUIALFy5g48aN2L17Nzw8PJ7bxty5c7F//354enpixIgRaNiwIdLS0nDy5Ens27dP45fm8+jp6WHZsmXo1q0b3N3dERwcDHt7e1y4cAFnz57F7t27ATy5ToO3tzfc3NwwYsQI1K5dG8nJyThy5Ahu3ryJf/75R+vtUp5ednstWrQIV65cwfjx47FlyxZ07doVFhYWuH79OjZt2oQLFy5Ig7bnzJkjXdNlzJgxqFSpEr777jvk5uZi/vz5Zb5urq6u8Pb2xujRo6UvUisrK0yZMgXAkzFNhcfoHz9+jOrVq2PPnj1ISEh4qeV++umnOHjwILp06QInJyekpKTg22+/RY0aNaSxCyNHjsR3332HoKAgnDhxAs7Ozti8eTMOHz6MiIiIEg02f1ZZnCL/tK5du2LLli1499130aVLFyQkJGD58uVo2LCh2jVZSiM8PBxdunSBt7c3hg4dirS0NHzzzTdo1KiRWptdunTBl19+iYCAAAwcOBApKSlYunQpXF1dcfr0aaleSbb18/Tr1w+hoaEIDQ2FpaVlkb0Hw4cPR1paGtq1a4caNWrg2rVr+Oabb+Du7i6NYSmppk2bomnTps+tY2BggHnz5iE4OBht2rTBgAEDpFPknZ2dMWnSJKluSbdleejSpQsqVaqEffv2YeTIkUWmR0VFSXt2njZkyBBMmzYN69evR6dOnTB+/HhYWloiMjISCQkJiIqKUtsz6uXlhUqVKuHixYtqy/H19cWyZcsAQC0EAS//XWxqaoqvvvoKw4cPR4sWLaTrgv3zzz/Izs5GZGQk6tevDxcXF4SGhuLWrVswNTVFVFRUkeBx6dIltG/fHv369UPDhg1RqVIl/Prrr0hOTlY7mUVbhoaGmDVrFsaNG4d27dqhX79+SExMxOrVq+Hi4lLkiMrjx4+l6yKVSonPI5OR4k6Rr1KlSpG6z55CLITmU+SFeHKK6Lx580SjRo2EUqkUFhYWonnz5mL27NkiIyNDqof/f/qpJsnJyeJ///ufcHR0FAYGBsLOzk60b99erFixQqpT3Kmchaf7PnuK+qFDh0SHDh1E1apVRZUqVUSTJk2KnF575coVMXjwYGFnZycMDAxE9erVRdeuXcXmzZs19lMTbU6Rb9SoUZG6xW1fTdutJNvrefLz88X3338vfHx8hJmZmTAwMBBOTk4iODi4yOnzJ0+eFP7+/sLExERUrlxZvPPOO+LPP/9Uq6PpvSWEKHI6caFn33eFr+GCBQvEokWLhKOjo1AqlcLHx0f8888/avPevHlTvPvuu8Lc3FyYmZmJvn37itu3bxd7SrGmU7GffX/HxMSIHj16CAcHB2FoaCgcHBzEgAEDxKVLl9TmS05OFsHBwcLa2loYGhoKNze3Iq/70+vyrGf7qM0p8s87tVylUokvvvhCODk5CaVSKZo1aya2b98uhgwZonYKdmn6KIQQUVFRokGDBkKpVIqGDRuKLVu2FGlTCCF++OEHUadOHaFUKkX9+vXFqlWrtN7Wz/P2228LAGL48OFFpm3evFl07NhRVKtWTRgaGoqaNWuKDz74QNy5c+eF7T7vO6pQca/Dhg0bRLNmzYRSqRSWlpbivffeU7vcQ6GSbsuSvp8LP3sJCQkvXL/u3buL9u3bq5UVfkcV9yg8Lf7KlSuiT58+wtzcXBgZGYmWLVuK7du3a1xOixYtipzSffPmTQFAODo6apynJN/FxX3PFPrtt99E69athbGxsTA1NRUtW7YU69evl6afO3dO+Pn5CRMTE2FtbS1GjBgh/vnnH7Xv79TUVPG///1P1K9fX1SpUkWYmZkJT09PsXHjRrVlFXeKfEl/nwovH6BUKkXLli3F4cOHRfPmzUVAQIBavV27dgkA4vLlyxrXuTgKIV5y9CERVZjExETUqlULCxYsQGhoqK67Q/RGKrwq8YULFzSepUa6o1KpYGNjg169eqkNL+nZsycUCkWxVxYvDscEERERPcXHxwcdO3Ysl0PZVHKPHj0qMq7xp59+QlpamtptMwrvcvDZZ5+VehkcE0RERPSMXbt26boLsnf06FFMmjQJffv2hZWVFU6ePIkffvgBjRs3Rt++faV6DRo0UBs8XRoMQURERPTKcXZ2hqOjI77++mukpaXB0tISgwcPxty5c8vs7vQcE0RERESyxDFBREREJEsMQURERCRLDEFEhAMHDkChUGDz5s267kqJJCcno0+fPrCysoJCoXjpK2ETkTwxBBFVkNWrV0OhUMDIyEjtlheF2rZt+8K7O9MTkyZNwu7duzF9+nSsWbNG7cagz1IoFGoPU1NTtGnTBjt27KjAHpe9ixcvYtKkSWjdujWMjIygUCjUbtvytA0bNmDQoEGoU6cOFAqF2unFzzpx4gQCAgJgamqKqlWromPHjvj777+L1Hv8+DFmz56N2rVrQ6lUonbt2pgzZ47Gs3Ryc3MxdepUODg4wNjYGJ6ensXei4qoIjEEEVWw3NxczJ07V9fdeK39/vvv6NGjB0JDQzFo0CDUr1//ufU7dOiANWvW4KeffsKUKVMQHx+Pbt26SbeGeR0dOXIEX3/9NR48ePDC21wsW7YM27Ztg6OjIywsLIqtd/LkSXh7e+Pq1auYOXMmwsLCcPnyZbRp06bIjUEHDRqE2bNno127dli8eDF8fX0xY8YMjbctCAoKwpdffon33nsPixcvhr6+Pjp37oxDhw5pt/JEZaVU15cmIq0VXsre3d1dKJVKcevWLbXpxd0mpCIUdyn7svbw4cMyaUehULzwtg2FoOEWD+fOnRMARKdOncqkP7pw7949kZmZKYQQYsGCBc+9JcT169dFQUGBEEKIRo0aqd3G4GmdO3cWFhYWIjU1VSq7ffu2MDExEb169ZLKjh07JgCIGTNmqM0/efJkoVAo1G7hEhcXV+TWIzk5OcLFxUV4eXmVap2Jyhr3BBFVsI8++ggFBQUv3BuUmJgIhUKh8YahCoUCs2bNkp7PmjULCoUCly5dwqBBg2BmZgYbGxvMmDEDQgjcuHEDPXr0gKmpKezs7LBo0SKNyywoKMBHH30EOzs7VKlSBd27d8eNGzeK1IuLi0NAQADMzMxQuXJltGnTBocPH1arU9inc+fOSTdqfNGNP69evYq+ffvC0tISlStXRqtWrdQOWxUeUhRCYOnSpdIhrtJq0KABrK2tceXKFbXylJQUDBs2DLa2tjAyMkLTpk0RGRmpVuett95Cr1691Mrc3NygUCjUboC6YcMGKBQKnD9/HgDw4MEDTJw4Ec7OzlAqlahWrRo6dOiAkydPSvNkZ2fjwoULSE1NfeE6WFpalvhmtI6Ojmo37yxObGws/Pz8YGVlJZXZ29ujTZs22L59u3Tz0tjYWAAocqPM/v37QwiBDRs2SGWbN2+Gvr6+2k1CjYyMMGzYMBw5ckTj+4uoojAEEVWwWrVqYfDgwVi5ciVu375dpm0HBgZCpVJh7ty58PT0xJw5cxAREYEOHTqgevXqmDdvHlxdXREaGoqDBw8Wmf/zzz/Hjh07MHXqVIwfPx579+6Fn58fcnJypDq///47fH19kZmZiZkzZ+KLL75Aeno62rVrh2PHjhVps2/fvsjOzsYXX3yBESNGFNv35ORktG7dGrt378aYMWPw+eef49GjR+jevbt0PyBfX1+sWbMGwH+HuAqfl0ZGRgbu37+vdmgoJycHbdu2xZo1a/Dee+9hwYIFMDMzQ1BQEBYvXizV8/HxUTuMk5aWhrNnz0JPT08KB8CToGBjYyMdqho1ahSWLVuG3r1749tvv0VoaCiMjY2lkAQAx44dQ4MGDbBkyZJSr1NZyM3NhbGxcZHyypUrIy8vD2fOnJHqAShSt3LlygCejCsqdOrUKdStWxempqZqdVu2bAkAGscbEVUYHe+JIpKNp+/sfOXKFVGpUiUxfvx4afqzh8OKu6uyEMXfOXvkyJFSWX5+vqhRo4ZQKBRi7ty5Uvn9+/eFsbGx2h3ZCw+HVa9eXTrEIoQQGzduFADE4sWLhRBP7r5ep04d4e/vL1QqlVQvOztb1KpVS3To0KFInwYMGFCi7TNx4kS1u3ELIcSDBw9ErVq1hLOzs3Q4p3D9S3M4bNiwYeLu3bsiJSVFHD9+XAQEBBQ5RBMRESEAiJ9//lkqy8vLE15eXsLExETaLps2bRIAxLlz54QQT+7IrVQqRffu3UVgYKA0b5MmTcS7774rPTczM3thnwtfh2fvTv8iLzoc9rTnHQ5zc3MTdevWFfn5+VJZbm6uqFmzpgAg3ak8KipKABBr1qxRm3/58uUCgGjcuLHa8tq1a1dkWWfPnhUAxPLly0uwhkTlg3uCiHSgdu3aeP/997FixQrcuXOnzNodPny49H99fX14eHhACIFhw4ZJ5ebm5qhXrx6uXr1aZP7BgwerHWLp06cP7O3tsXPnTgBP/mq/fPkyBg4ciHv37iE1NRWpqanIyspC+/btcfDgQahUKrU2R40aVaK+79y5Ey1btlQ7ZGZiYoKRI0ciMTER586dK9lG0OCHH36AjY0NqlWrBg8PD8TExGDKlCkICQlRW76dnR0GDBgglRkYGGD8+PF4+PAh/vjjDwBP9gQBkPakxcbGokWLFujQoYO0Jyg9PR1nzpyR6gJPtntcXNxz9/61bdsWQgi1Q50VacyYMbh06RKGDRuGc+fO4cyZMxg8eLD0Hi3cI9i5c2c4OTkhNDQUW7ZswbVr17Bx40Z8/PHHqFSpktqew5ycHCiVyiLLMjIyUmuTSBcYgoh05JNPPkF+fn6ZnilWs2ZNtedmZmYwMjKCtbV1kfL79+8Xmb9OnTpqzxUKBVxdXaVTry9fvgwAGDJkCGxsbNQe33//PXJzc5GRkaHWRq1atUrU92vXrqFevXpFygsPJ127dq1E7WjSo0cP7N27Fzt27JDGKmVnZ6uNk7l27Rrq1KlTZOzMs8u3tbVFnTp1pMATGxsLHx8f+Pr64vbt27h69SoOHz4MlUqlFoLmz5+PM2fOwNHRES1btsSsWbM0BlFdGjVqFD766COsW7cOjRo1gpubG65cuYIpU6YAeBJKgScBZseOHbCyskLv3r3h7OyMwYMHIywsDJaWllI94Mkhs8LDZ0979OiRNJ1IV3gDVSIdqV27NgYNGoQVK1Zg2rRpRaYXN+C3oKCg2Db19fVLVAYAQovbBhbu5VmwYAHc3d011nn6BxB4NX7katSoAT8/PwBP9mJYW1tj7NixeOedd4oMci4Jb29vxMTEICcnBydOnEBYWBgaN24Mc3NzxMbG4vz58zAxMUGzZs2kefr16wcfHx/8+uuv2LNnDxYsWIB58+Zhy5Yt6NSpU5mt68v6/PPPERoairNnz8LMzAxubm746KOPAAB169aV6jVq1AhnzpzBuXPncP/+fTRs2BDGxsaYNGkS2rRpI9Wzt7fXeF2swr1LDg4O5bxGRMXjniAiHSrcGzRv3rwi0woH7aanp6uVv8wekRcp3NNTSAiB+Ph4ODs7AwBcXFwAAKampvDz89P4MDAw0GrZTk5ORa5FAwAXLlyQppeVDz74AC4uLvjkk0+kMOjk5ITLly8XOZynafk+Pj64fv06fvnlFxQUFKB169bQ09ODt7c3YmNjERsbi9atWxcJoPb29hgzZgy2bt2KhIQEWFlZ4fPPPy+z9SorhWfyubm5AQD27duHGjVqFLkek0KhQKNGjeDt7Q1LS0vs378fKpVKCpwA4O7ujkuXLiEzM1Nt3ri4OGk6ka4wBBHpkIuLCwYNGoTvvvsOSUlJatNMTU1hbW1d5Cyub7/9ttz689NPP+HBgwfS882bN+POnTvSnormzZvDxcUFCxculE6Xftrdu3e1Xnbnzp1x7NgxHDlyRCrLysrCihUr4OzsjIYNG2rd9rMqVaqEyZMn4/z589i2bZu0/KSkJLXTu/Pz8/HNN9/AxMREbe9G4WGuefPmoUmTJjAzM5PKY2JicPz4cbVDYQUFBUUOE1arVg0ODg5qh4pKc4p8RdmwYQP++usvTJw48bmn2efk5GDGjBmwt7dXG1fVp08fFBQUYMWKFVJZbm4uVq1aBU9PTzg6OpZr/4meh4fDiHTs448/xpo1a3Dx4kU0atRIbdrw4cMxd+5cDB8+HB4eHjh48CAuXbpUbn2xtLSEt7c3goODkZycjIiICLi6ukqntuvp6eH7779Hp06d0KhRIwQHB6N69eq4desW9u/fD1NTU/zf//2fVsueNm0a1q9fj06dOmH8+PGwtLREZGQkEhISEBUVVaLr3JRGUFAQwsLCMG/ePPTs2RMjR47Ed999h6CgIJw4cQLOzs7YvHkzDh8+jIiICLUB466urrCzs8PFixcxbtw4qdzX1xdTp04FALUQ9ODBA9SoUQN9+vRB06ZNYWJign379uGvv/5Su2bTsWPH8M4772DmzJkvHBydkZGBb775BgCkazQtWbIE5ubmMDc3x9ixY6W6Bw8elML03bt3kZWVhTlz5kh99vX1lep9+umn6NixI6ysrHD06FGsWrUKAQEBmDBhgtry+/XrBwcHBzRs2BCZmZn48ccfcfXqVezYsUNtW3l6eqJv376YPn06UlJS4OrqisjISCQmJuKHH354watEVM50em4akYw8fYr8s4YMGSIAFLlidHZ2thg2bJgwMzMTVatWFf369RMpKSnFniJ/9+7dIu1WqVKlyPKePR2/8NTs9evXi+nTp4tq1aoJY2Nj0aVLF3Ht2rUi8586dUr06tVLWFlZCaVSKZycnES/fv1ETEzMC/v0PFeuXBF9+vQR5ubmwsjISLRs2VJs3769SD2U8hT54urOmjVLABD79+8XQgiRnJwsgoODhbW1tTA0NBRubm4aL1EghBB9+/YVAMSGDRuksry8PFG5cmVhaGgocnJypPLc3Fzx4YcfiqZNm4qqVauKKlWqiKZNm4pvv/1Wrc3SnCJfeAkFTQ8nJye1uoWvhabH08uKj48XHTt2FNbW1kKpVIr69euL8PBwkZubW2T58+bNE/Xr1xdGRkbCwsJCdO/eXZw6dUpjX3NyckRoaKiws7MTSqVStGjRQkRHR79wHYnKm0IILUZHEhEREb3mOCaIiIiIZIkhiIiIiGSJIYiIiIhkiSGIiIiIZIkhiIiIiGSJIYiIiIhkiRdL1EClUuH27duoWrVqsfdvIiIioleLEAIPHjyAg4NDiS6wyhCkwe3bt3kpdyIiotfUjRs3UKNGjRfWYwjSoPCS7zdu3ICpqamOe0NEREQlkZmZCUdHR7VbtzwPQ5AGhYfATE1NGYKIiIheMyUdysKB0URERCRLDEFEREQkSwxBREREJEsMQURERCRLDEFEREQkSwxBREREJEsMQURERCRLDEFEREQkSwxBREREJEsMQURERCRLDEFEREQkSwxBREREJEsMQURERCRLDEFEREQkS5V03QEiojfSOoWue0D06hoodN0DANwTRERERDLFEERERESyxBBEREREssQQRERERLLEEERERESyxBBEREREssQQRERERLLEEERERESyxBBEREREssQQRERERLLEEERERESyxBBEREREssQQRERERLLEEERERESyxBBEREREssQQRERERLLEEERERESyxBBEREREssQQRERERLLEEERERESy9EqEoKVLl8LZ2RlGRkbw9PTEsWPHiq27cuVK+Pj4wMLCAhYWFvDz8ytSPygoCAqFQu0REBBQ3qtBRERErxGdh6ANGzYgJCQEM2fOxMmTJ9G0aVP4+/sjJSVFY/0DBw5gwIAB2L9/P44cOQJHR0d07NgRt27dUqsXEBCAO3fuSI/169dXxOoQERHRa0IhhBC67ICnpydatGiBJUuWAABUKhUcHR0xbtw4TJs27YXzFxQUwMLCAkuWLMHgwYMBPNkTlJ6ejq1bt2rVp8zMTJiZmSEjIwOmpqZatUFEMrdOoeseEL26BpZP9Cjt77dO9wTl5eXhxIkT8PPzk8r09PTg5+eHI0eOlKiN7OxsPH78GJaWlmrlBw4cQLVq1VCvXj2MHj0a9+7dK7aN3NxcZGZmqj2IiIjozabTEJSamoqCggLY2tqqldva2iIpKalEbUydOhUODg5qQSogIAA//fQTYmJiMG/ePPzxxx/o1KkTCgoKNLYRHh4OMzMz6eHo6Kj9ShEREdFroZKuO/Ay5s6di19++QUHDhyAkZGRVN6/f3/p/25ubmjSpAlcXFxw4MABtG/fvkg706dPR0hIiPQ8MzOTQYiIiOgNp9M9QdbW1tDX10dycrJaeXJyMuzs7J4778KFCzF37lzs2bMHTZo0eW7d2rVrw9raGvHx8RqnK5VKmJqaqj2IiIjozabTEGRoaIjmzZsjJiZGKlOpVIiJiYGXl1ex882fPx+fffYZoqOj4eHh8cLl3Lx5E/fu3YO9vX2Z9JuIiIhefzo/RT4kJAQrV65EZGQkzp8/j9GjRyMrKwvBwcEAgMGDB2P69OlS/Xnz5mHGjBn48ccf4ezsjKSkJCQlJeHhw4cAgIcPH+LDDz/E0aNHkZiYiJiYGPTo0QOurq7w9/fXyToSERHRq0fnY4ICAwNx9+5dhIWFISkpCe7u7oiOjpYGS1+/fh16ev9ltWXLliEvLw99+vRRa2fmzJmYNWsW9PX1cfr0aURGRiI9PR0ODg7o2LEjPvvsMyiVygpdNyIiInp16fw6Qa8iXieIiF4arxNEVDxeJ4iIiIhIdxiCiIiISJYYgoiIiEiWGIKIiIhIlhiCiIiISJYYgoiIiEiWGIKIiIhIlhiCiIiISJYYgoiIiEiWGIKIiIhIlhiCiIiISJYYgoiIiEiWGIKIiIhIlhiCiIiISJYYgoiIiEiWGIKIiIhIlhiCiIiISJYYgoiIiEiWGIKIiIhIlhiCiIiISJYYgoiIiEiWGIKIiIhIlhiCiIiISJYYgoiIiEiWGIKIiIhIlhiCiIiISJYYgoiIiEiWGIKIiIhIlhiCiIiISJYYgoiIiEiWGIKIiIhIlhiCiIiISJYYgoiIiEiWGIKIiIhIlhiCiIiISJYYgoiIiEiWGIKIiIhIlhiCiIiISJYYgoiIiEiWGIKIiIhIlhiCiIiISJYYgoiIiEiWGIKIiIhIlhiCiIiISJYYgoiIiEiWGIKIiIhIlhiCiIiISJYYgoiIiEiWGIKIiIhIlhiCiIiISJYYgoiIiEiWGIKIiIhIlhiCiIiISJYYgoiIiEiWXokQtHTpUjg7O8PIyAienp44duxYsXVXrlwJHx8fWFhYwMLCAn5+fkXqCyEQFhYGe3t7GBsbw8/PD5cvXy7v1SAiIqLXiM5D0IYNGxASEoKZM2fi5MmTaNq0Kfz9/ZGSkqKx/oEDBzBgwADs378fR44cgaOjIzp27Ihbt25JdebPn4+vv/4ay5cvR1xcHKpUqQJ/f388evSoolaLiIiIXnEKIYTQZQc8PT3RokULLFmyBACgUqng6OiIcePGYdq0aS+cv6CgABYWFliyZAkGDx4MIQQcHBwwefJkhIaGAgAyMjJga2uL1atXo3///i9sMzMzE2ZmZsjIyICpqenLrSARydM6ha57QPTqGlg+0aO0v9863ROUl5eHEydOwM/PTyrT09ODn58fjhw5UqI2srOz8fjxY1haWgIAEhISkJSUpNammZkZPD09i20zNzcXmZmZag8iIiJ6s+k0BKWmpqKgoAC2trZq5ba2tkhKSipRG1OnToWDg4MUegrnK02b4eHhMDMzkx6Ojo6lXRUiIiJ6zeh8TNDLmDt3Ln755Rf8+uuvMDIy0rqd6dOnIyMjQ3rcuHGjDHtJREREr6JKuly4tbU19PX1kZycrFaenJwMOzu75867cOFCzJ07F/v27UOTJk2k8sL5kpOTYW9vr9amu7u7xraUSiWUSqWWa0FERESvI53uCTI0NETz5s0RExMjlalUKsTExMDLy6vY+ebPn4/PPvsM0dHR8PDwUJtWq1Yt2NnZqbWZmZmJuLi457ZJRERE8qLTPUEAEBISgiFDhsDDwwMtW7ZEREQEsrKyEBwcDAAYPHgwqlevjvDwcADAvHnzEBYWhnXr1sHZ2Vka52NiYgITExMoFApMnDgRc+bMQZ06dVCrVi3MmDEDDg4O6Nmzp65Wk4iIiF4xOg9BgYGBuHv3LsLCwpCUlAR3d3dER0dLA5uvX78OPb3/dlgtW7YMeXl56NOnj1o7M2fOxKxZswAAU6ZMQVZWFkaOHIn09HR4e3sjOjr6pcYNERER0ZtF59cJehXxOkFE9NJ4nSCi4vE6QURERES6wxBEREREssQQRERERLLEEERERESyxBBEREREssQQRERERLLEEERERESyxBBEREREssQQRERERLLEEERERESyxBBEREREssQQRERERLLEEERERESyxBBEREREssQQRERERLLEEERERESyxBBEREREssQQRERERLLEEERERESyxBBEREREslRJm5kSEhIQGxuLa9euITs7GzY2NmjWrBm8vLxgZGRU1n0kIiIiKnOlCkFr167F4sWLcfz4cdja2sLBwQHGxsZIS0vDlStXYGRkhPfeew9Tp06Fk5NTefWZiIiI6KWVOAQ1a9YMhoaGCAoKQlRUFBwdHdWm5+bm4siRI/jll1/g4eGBb7/9Fn379i3zDhMRERGVBYUQQpSk4u7du+Hv71+iRu/du4fExEQ0b978pTqnK5mZmTAzM0NGRgZMTU113R0ieh2tU+i6B0SvroElih6lVtrf7xLvCSppAAIAKysrWFlZlbg+ERERUUXT6uywkydP4t9//5Web9u2DT179sRHH32EvLy8MuscERERUXnRKgR98MEHuHTpEgDg6tWr6N+/PypXroxNmzZhypQpZdpBIiIiovKgVQi6dOkS3N3dAQCbNm2Cr68v1q1bh9WrVyMqKqos+0dERERULrQKQUIIqFQqAMC+ffvQuXNnAICjoyNSU1PLrndERERE5USrEOTh4YE5c+ZgzZo1+OOPP9ClSxcATy6iaGtrW6YdJCIiIioPWoWgiIgInDx5EmPHjsXHH38MV1dXAMDmzZvRunXrMu0gERERUXko8XWCSuLRo0fQ19eHgYFBWTWpE7xOEBG9NF4niKh4r9t1gkqC9w0jIiKi10WJQ5CFhQUUipL9ZZOWlqZ1h4iIiIgqQolDUEREhPT/e/fuYc6cOfD394eXlxcA4MiRI9i9ezdmzJhR5p0kIiIiKmtajQnq3bs33nnnHYwdO1atfMmSJdi3bx+2bt1aVv3TCY4JIqKXxjFBRMV7RcYEaXV22O7duxEQEFCkPCAgAPv27dOmSSIiIqIKpVUIsrKywrZt24qUb9u2jTdOJSIioteCVmeHzZ49G8OHD8eBAwfg6ekJAIiLi0N0dDRWrlxZph0kIiIiKg9ahaCgoCA0aNAAX3/9NbZs2QIAaNCgAQ4dOiSFIiIiIqJXmdbXCfL09MTatWvLsi9EREREFUbrEKRSqRAfH4+UlBTpZqqFfH19X7pjREREROVJqxB09OhRDBw4ENeuXcOzZ9grFAoUFBSUSeeIiIiIyotWIWjUqFHw8PDAjh07YG9vX+IrSRMRERG9KrQKQZcvX8bmzZulu8cTERERvW60uk6Qp6cn4uPjy7ovRERERBVGqz1B48aNw+TJk5GUlAQ3NzcYGBioTW/SpEmZdI6IiIiovGh17zA9vaI7kBQKBYQQb8TAaN47jIheGu8dRlS8V+TeYVrtCUpISNBmNiIiIqJXhlYhyMnJqaz7QURERFShtL5Y4pUrVxAREYHz588DABo2bIgJEybAxcWlzDpHREREVF60Ojts9+7daNiwIY4dO4YmTZqgSZMmiIuLQ6NGjbB3796y7iMRERFRmdNqYHSzZs3g7++PuXPnqpVPmzYNe/bswcmTJ8usg7rAgdFE9NI4MJqoeK/IwGit9gSdP38ew4YNK1I+dOhQnDt3TpsmiYiIiCqUViHIxsYGf//9d5Hyv//+G9WqVXvZPhERERGVO61C0IgRIzBy5EjMmzcPsbGxiI2Nxdy5c/HBBx9gxIgRpWpr6dKlcHZ2hpGRETw9PXHs2LFi6549exa9e/eGs7MzFAoFIiIiitSZNWsWFAqF2qN+/fqlXUUiIiJ6w2l1dtiMGTNQtWpVLFq0CNOnTwcAODg4YNasWRg/fnyJ29mwYQNCQkKwfPlyeHp6IiIiAv7+/rh48aLGPUrZ2dmoXbs2+vbti0mTJhXbbqNGjbBv3z7peaVKWp8ER0RERG8orQZGP+3BgwcAgKpVq5Z6Xk9PT7Ro0QJLliwBAKhUKjg6OmLcuHGYNm3ac+d1dnbGxIkTMXHiRLXyWbNmYevWrRoP15UUB0YT0UvjwGii4r3OA6MTEhJw+fJlAE/CT2EAunz5MhITE0vURl5eHk6cOAE/P7//OqOnBz8/Pxw5ckSbbkkuX74MBwcH1K5dG++99x6uX7/+3Pq5ubnIzMxUexAREdGbTasQFBQUhD///LNIeVxcHIKCgkrURmpqKgoKCmBra6tWbmtri6SkJG26BeDJ3qXVq1cjOjoay5YtQ0JCAnx8fKQ9VpqEh4fDzMxMejg6Omq9fCIiIno9aBWCTp06hbfffrtIeatWrV7qMFRZ6NSpE/r27YsmTZrA398fO3fuRHp6OjZu3FjsPNOnT0dGRob0uHHjRgX2mIiIiHRBqxHDCoVC456VjIyMEt9B3traGvr6+khOTlYrT05Ohp2dnTbd0sjc3Bx169ZFfHx8sXWUSiWUSmWZLZOIiIhefVrtCfL19UV4eLha4CkoKEB4eDi8vb1L1IahoSGaN2+OmJgYqUylUiEmJgZeXl7adEujhw8f4sqVK7C3ty+zNomIiOj1p9WeoHnz5sHX1xf16tWDj48PACA2NhaZmZn4/fffS9xOSEgIhgwZAg8PD7Rs2RIRERHIyspCcHAwAGDw4MGoXr06wsPDATwZTF14Req8vDzcunULf//9N0xMTODq6goACA0NRbdu3eDk5ITbt29j5syZ0NfXx4ABA7RZVSIiInpDaRWCGjZsiNOnT2PJkiX4559/YGxsjMGDB2Ps2LGwtLQscTuBgYG4e/cuwsLCkJSUBHd3d0RHR0uDpa9fvw49vf92Vt2+fRvNmjWTni9cuBALFy5EmzZtcODAAQDAzZs3MWDAANy7dw82Njbw9vbG0aNHYWNjo82qEhER0Rvqpa8T9CbidYKI6KXxOkFExXudrxMEPDn8NWjQILRu3Rq3bt0CAKxZswaHDh3StkkiIiKiCqNVCIqKioK/vz+MjY1x8uRJ5ObmAnhydtgXX3xRph0kIiIiKg9ahaA5c+Zg+fLlWLlyJQwMDKTyt99+GydPniyzzhERERGVF61C0MWLF+Hr61uk3MzMDOnp6S/bJyIiIqJyp1UIsrOz03jxwUOHDqF27dov3SkiIiKi8qZVCBoxYgQmTJiAuLg4KBQK3L59G2vXrkVoaChGjx5d1n0kIiIiKnNaXSdo2rRpUKlUaN++PbKzs+Hr6wulUonQ0FCMGzeurPtIREREVOZe6jpBeXl5iI+Px8OHD9GwYUOYmJiUZd90htcJIqKXxusEERXvFblOkFZ7ggoZGhqiYcOGyMzMxL59+1CvXj00aNDgZZqUBQW/G4mKxcu3ElFF0WpMUL9+/bBkyRIAQE5ODlq0aIF+/fqhSZMmiIqKKtMOEhEREZUHrULQwYMHpRun/vrrr1CpVEhPT8fXX3+NOXPmlGkHiYiIiMqDViEoIyNDulFqdHQ0evfujcqVK6NLly64fPlymXaQiIiIqDxoFYIcHR1x5MgRZGVlITo6Gh07dgQA3L9/H0ZGRmXaQSIiIqLyoNXA6IkTJ+K9996DiYkJnJyc0LZtWwBPDpO5ubmVZf+IiIiIyoVWIWjMmDHw9PTE9evX0aFDB+jpPdmhVLt2bY4JIiIioteC1qfIN2/eHM2bN1cr69Kly0t3iIiIiKgilHhM0Ny5c5GTk1OiunFxcdixY4fWnSIiIiIqbyUOQefOnUPNmjUxZswY7Nq1C3fv3pWm5efn4/Tp0/j222/RunVrBAYGomrVquXSYSIiIqKyUOLDYT/99BP++ecfLFmyBAMHDkRmZib09fWhVCqRnZ0NAGjWrBmGDx+OoKAgniVGRERErzSt7h2mUqlw+vRpXLt2DTk5ObC2toa7uzusra3Lo48VrrzvHcbbZhAV7425bQbvHUZUvNf53mF6enpwd3eHu7u7NrMTERER6ZxWF0skIiIiet0xBBEREZEsMQQRERGRLDEEERERkSy9VAiKj4/H7t27pYsoanGiGREREZFOaBWC7t27Bz8/P9StWxedO3fGnTt3AADDhg3D5MmTy7SDREREROVBqxA0adIkVKpUCdevX0flypWl8sDAQERHR5dZ54iIiIjKi1bXCdqzZw92796NGjVqqJXXqVMH165dK5OOEREREZUnrfYEZWVlqe0BKpSWlgalUvnSnSIiIiIqb1qFIB8fH/z000/Sc4VCAZVKhfnz5+Odd94ps84RERERlRetDofNnz8f7du3x/Hjx5GXl4cpU6bg7NmzSEtLw+HDh8u6j0RERERlTqs9QY0bN8alS5fg7e2NHj16ICsrC7169cKpU6fg4uJS1n0kIiIiKnNa7QkCADMzM3z88cdl2RciIiKiCqN1CHr06BFOnz6NlJQUqFQqtWndu3d/6Y4RERERlSetQlB0dDQGDx6M1NTUItMUCgUKCgpeumNERERE5UmrMUHjxo1D3759cefOHahUKrUHAxARERG9DrQKQcnJyQgJCYGtrW1Z94eIiIioQmgVgvr06YMDBw6UcVeIiIiIKo5WY4KWLFmCvn37IjY2Fm5ubjAwMFCbPn78+DLpHBEREVF50SoErV+/Hnv27IGRkREOHDgAhUIhTVMoFAxBRERE9MrTKgR9/PHHmD17NqZNmwY9Pa2OqBERERHplFYJJi8vD4GBgQxARERE9NrSKsUMGTIEGzZsKOu+EBEREVUYrQ6HFRQUYP78+di9ezeaNGlSZGD0l19+WSadIyIiIiovWoWgf//9F82aNQMAnDlzRm3a04OkiYiIiF5VWoWg/fv3l3U/iIiIiCoURzYTERGRLJV4T1CvXr2wevVqmJqaolevXs+tu2XLlpfuGBEREVF5KnEIMjMzk8b7mJmZlVuHiIiIiCqCQgghSlr5008/RWhoKCpXrlyefdK5zMxMmJmZISMjA6ampmXePseOExWv5N9Ir7h1/KATFWtg+XzQS/v7XaoxQbNnz8bDhw+17hwRERHRq6JUIagUO42IiIiIXmmlPjuM1wEiIiKiN0GpQ1DdunVhaWn53EdpLF26FM7OzjAyMoKnpyeOHTtWbN2zZ8+id+/ecHZ2hkKhQERExEu3SURERPJU6oslzp49u8zODtuwYQNCQkKwfPlyeHp6IiIiAv7+/rh48SKqVatWpH52djZq166Nvn37YtKkSWXSJhEREclTqc4O09PTQ1JSUpmFCU9PT7Ro0QJLliwBAKhUKjg6OmLcuHGYNm3ac+d1dnbGxIkTMXHixDJrsxDPDiPSnTdm6CHPDiMq3ut4dlhZjgfKy8vDiRMn4Ofn919n9PTg5+eHI0eOVGibubm5yMzMVHsQERHRm01nZ4elpqaioKAAtra2auW2trZISkqq0DbDw8NhZmYmPRwdHbVaPhEREb0+ShWCVCrVGzmuZvr06cjIyJAeN27c0HWXiIiIqJxpdRf5smBtbQ19fX0kJyerlScnJ8POzq5C21QqlVAqlVotk4iIiF5POruLvKGhIZo3b46YmBipTKVSISYmBl5eXq9Mm0RERPRm0tmeIAAICQnBkCFD4OHhgZYtWyIiIgJZWVkIDg4GAAwePBjVq1dHeHg4gCcDn8+dOyf9/9atW/j7779hYmICV1fXErVJREREBOg4BAUGBuLu3bsICwtDUlIS3N3dER0dLQ1svn79OvT0/ttZdfv2bTRr1kx6vnDhQixcuBBt2rTBgQMHStQmEREREVDK6wTJBa8TRKQ7b8w3Eq8TRFS81/E6QURERERvCoYgIiIikiWGICIiIpIlhiAiIiKSJYYgIiIikiWGICIiIpIlhiAiIiKSJYYgIiIikiWGICIiIpIlhiAiIiKSJYYgIiIikiWGICIiIpIlhiAiIiKSJYYgIiIikiWGICIiIpIlhiAiIiKSJYYgIiIikiWGICIiIpIlhiAiIiKSJYYgIiIikiWGICIiIpIlhiAiIiKSJYYgIiIikiWGICIiIpIlhiAiIiKSJYYgIiIikiWGICIiIpIlhiAiIiKSJYYgIiIikiWGICIiIpIlhiAiIiKSJYYgIiIikiWGICIiIpIlhiAiIiKSJYYgIiIikiWGICIiIpIlhiAiIiKSJYYgIiIikiWGICIiIpIlhiAiIiKSJYYgIiIikiWGICIiIpIlhiAiIiKSJYYgIiIikiWGICIiIpIlhiAiIiKSJYYgIiIikiWGICIiIpIlhiAiIiKSJYYgIiIikiWGICIiIpIlhiAiIiKSJYYgIiIikiWGICIiIpIlhiAiIiKSpVciBC1duhTOzs4wMjKCp6cnjh079tz6mzZtQv369WFkZAQ3Nzfs3LlTbXpQUBAUCoXaIyAgoDxXgYiIiF4zOg9BGzZsQEhICGbOnImTJ0+iadOm8Pf3R0pKisb6f/75JwYMGIBhw4bh1KlT6NmzJ3r27IkzZ86o1QsICMCdO3ekx/r16ytidYiIiOg1oRBCCF12wNPTEy1atMCSJUsAACqVCo6Ojhg3bhymTZtWpH5gYCCysrKwfft2qaxVq1Zwd3fH8uXLATzZE5Seno6tW7dq1afMzEyYmZkhIyMDpqamWrXxPApFmTdJ9MbQ7TdSGVrHDzpRsQaWzwe9tL/fOt0TlJeXhxMnTsDPz08q09PTg5+fH44cOaJxniNHjqjVBwB/f/8i9Q8cOIBq1aqhXr16GD16NO7du1dsP3Jzc5GZman2ICIiojebTkNQamoqCgoKYGtrq1Zua2uLpKQkjfMkJSW9sH5AQAB++uknxMTEYN68efjjjz/QqVMnFBQUaGwzPDwcZmZm0sPR0fEl14yIiIhedZV03YHy0L9/f+n/bm5uaNKkCVxcXHDgwAG0b9++SP3p06cjJCREep6ZmckgRERE9IbT6Z4ga2tr6OvrIzk5Wa08OTkZdnZ2Guexs7MrVX0AqF27NqytrREfH69xulKphKmpqdqDiIiI3mw6DUGGhoZo3rw5YmJipDKVSoWYmBh4eXlpnMfLy0utPgDs3bu32PoAcPPmTdy7dw/29vZl03EiIiJ67en8FPmQkBCsXLkSkZGROH/+PEaPHo2srCwEBwcDAAYPHozp06dL9SdMmIDo6GgsWrQIFy5cwKxZs3D8+HGMHTsWAPDw4UN8+OGHOHr0KBITExETE4MePXrA1dUV/v7+OllHIiIievXofExQYGAg7t69i7CwMCQlJcHd3R3R0dHS4Ofr169DT++/rNa6dWusW7cOn3zyCT766CPUqVMHW7duRePGjQEA+vr6OH36NCIjI5Geng4HBwd07NgRn332GZRKpU7WkYiIiF49Or9O0KuI1wki0p035huJ1wkiKh6vE0RERESkOwxBREREJEsMQURERCRLDEFEREQkSwxBREREJEsMQURERCRLDEFEREQkSwxBREREJEsMQURERCRLDEFEREQkSwxBREREJEsMQURERCRLDEFEREQkSwxBREREJEsMQURERCRLDEFEREQkSwxBREREJEsMQURERCRLDEFEREQkSwxBREREJEsMQURERCRLDEFEREQkSwxBREREJEsMQURERCRLDEFEREQkSwxBREREJEsMQURERCRLDEFEREQkSwxBREREJEsMQURERCRLDEFEREQkSwxBREREJEsMQURERCRLDEFEREQkSwxBREREJEsMQURERCRLDEFEREQkSwxBREREJEsMQURERCRLDEFEREQkSwxBREREJEsMQURERCRLDEFEREQkSwxBREREJEsMQURERCRLDEFEREQkSwxBREREJEsMQURERCRLDEFEREQkSwxBREREJEsMQURERCRLDEFEREQkSwxBREREJEsMQURERCRLr0QIWrp0KZydnWFkZARPT08cO3bsufU3bdqE+vXrw8jICG5ubti5c6fadCEEwsLCYG9vD2NjY/j5+eHy5cvluQpERET0mtF5CNqwYQNCQkIwc+ZMnDx5Ek2bNoW/vz9SUlI01v/zzz8xYMAADBs2DKdOnULPnj3Rs2dPnDlzRqozf/58fP3111i+fDni4uJQpUoV+Pv749GjRxW1WkRERPSKUwghhC474OnpiRYtWmDJkiUAAJVKBUdHR4wbNw7Tpk0rUj8wMBBZWVnYvn27VNaqVSu4u7tj+fLlEELAwcEBkydPRmhoKAAgIyMDtra2WL16Nfr37//CPmVmZsLMzAwZGRkwNTUtozX9j0JR5k0SvTF0+41Uhtbxg05UrIHl80Ev7e93pXLpRQnl5eXhxIkTmD59ulSmp6cHPz8/HDlyROM8R44cQUhIiFqZv78/tm7dCgBISEhAUlIS/Pz8pOlmZmbw9PTEkSNHNIag3Nxc5ObmSs8zMjIAPNmYRFSx3piPXbauO0D0CiunD3rh73ZJ9+/oNASlpqaioKAAtra2auW2tra4cOGCxnmSkpI01k9KSpKmF5YVV+dZ4eHhmD17dpFyR0fHkq0IEZUZMzNd94CIyt2I8v2gP3jwAGYl+DLRaQh6VUyfPl1t75JKpUJaWhqsrKyg4LGrN1pmZiYcHR1x48aNcjn0SUS6x8+5fAgh8ODBAzg4OJSovk5DkLW1NfT19ZGcnKxWnpycDDs7O43z2NnZPbd+4b/Jycmwt7dXq+Pu7q6xTaVSCaVSqVZmbm5emlWh15ypqSm/HInecPycy0NJ9gAV0unZYYaGhmjevDliYmKkMpVKhZiYGHh5eWmcx8vLS60+AOzdu1eqX6tWLdjZ2anVyczMRFxcXLFtEhERkfzo/HBYSEgIhgwZAg8PD7Rs2RIRERHIyspCcHAwAGDw4MGoXr06wsPDAQATJkxAmzZtsGjRInTp0gW//PILjh8/jhUrVgAAFAoFJk6ciDlz5qBOnTqoVasWZsyYAQcHB/Ts2VNXq0lERESvGJ2HoMDAQNy9exdhYWFISkqCu7s7oqOjpYHN169fh57efzusWrdujXXr1uGTTz7BRx99hDp16mDr1q1o3LixVGfKlCnIysrCyJEjkZ6eDm9vb0RHR8PIyKjC149ebUqlEjNnzixyOJSI3hz8nFNxdH6dICIiIiJd0PkVo4mIiIh0gSGIiIiIZIkhiIiIiGSJIYhIg6CgIJ5NSPSGO3DgABQKBdLT0wEAq1ev5jXiZIYhiF55QUFBUCgUUCgUMDQ0hKurKz799FPk5+frumtEVI4KP/ujRo0qMu1///sfFAoFgoKCymx5gYGBuHTpUpm1R68+hiB6LQQEBODOnTu4fPkyJk+ejFmzZmHBggW67hYRlTNHR0f88ssvyMnJkcoePXqEdevWoWbNmmW6LGNjY1SrVq1M26RXG0MQvRaUSiXs7Ozg5OSE0aNHw8/PD7/99hu+/PJLuLm5oUqVKnB0dMSYMWPw8OFDab7C3du7d+9GgwYNYGJiIgWqQgUFBQgJCYG5uTmsrKwwZcqUIncgjo6Ohre3t1Sna9euuHLlijQ9Ly8PY8eOhb29PYyMjODk5CRd4JOItPfWW2/B0dERW7Zskcq2bNmCmjVrolmzZlJZbm4uxo8fj2rVqsHIyAje3t7466+/1NrauXMn6tatC2NjY7zzzjtITExUm/7s4bBZs2bB3d0da9asgbOzM8zMzNC/f388ePCgXNaVKh5DEL2WjI2NkZeXBz09PXz99dc4e/YsIiMj8fvvv2PKlClqdbOzs7Fw4UKsWbMGBw8exPXr1xEaGipNX7RoEVavXo0ff/wRhw4dQlpaGn799Ve1NrKyshASEoLjx48jJiYGenp6ePfdd6FSqQAAX3/9NX777Tds3LgRFy9exNq1a+Hs7Fzu24FIDoYOHYpVq1ZJz3/88UfprgKFpkyZgqioKERGRuLkyZNwdXWFv78/0tLSAAA3btxAr1690K1bN/z9998YPnw4pk2b9sJlX7lyBVu3bsX27duxfft2/PHHH5g7d27ZriDpjiB6xQ0ZMkT06NFDCCGESqUSe/fuFUqlUoSGhhapu2nTJmFlZSU9X7VqlQAg4uPjpbKlS5cKW1tb6bm9vb2YP3++9Pzx48eiRo0a0jI1uXv3rgAg/v33XyGEEOPGjRPt2rUTKpVK29UkomcUfvZTUlKEUqkUiYmJIjExURgZGYm7d++KHj16iCFDhoiHDx8KAwMDsXbtWmnevLw84eDgIH22p0+fLho2bKjW/tSpUwUAcf/+fSHEk+8LMzMzafrMmTNF5cqVRWZmplT24YcfCk9Pz/JbaapQOr9tBlFJbN++HSYmJnj8+DFUKhUGDhyIWbNmYd++fQgPD8eFCxeQmZmJ/Px8PHr0CNnZ2ahcuTIAoHLlynBxcZHasre3R0pKCgAgIyMDd+7cgaenpzS9UqVK8PDwUDskdvnyZYSFhSEuLg6pqanSHqDr16+jcePGCAoKQocOHVCvXj0EBASga9eu6NixY0VsGqI3no2NDbp06YLVq1dDCIEuXbrA2tpamn7lyhU8fvwYb7/9tlRmYGCAli1b4vz58wCA8+fPq33OAZToptrOzs6oWrWq9Pzp7w96/fFwGL0W3nnnHfz999+4fPkycnJyEBkZibt376Jr165o0qQJoqKicOLECSxduhTAkzE6hQwMDNTaUigURcb8vEi3bt2QlpaGlStXIi4uDnFxcWrLeeutt5CQkIDPPvsMOTk56NevH/r06fMyq0xETxk6dChWr16NyMhIDB06tMKWq+n7o/CPIHr9MQTRa6FKlSpwdXVFzZo1UanSkx2YJ06cgEqlwqJFi9CqVSvUrVsXt2/fLlW7ZmZmsLe3l0INAOTn5+PEiRPS83v37uHixYv45JNP0L59ezRo0AD3798v0papqSkCAwOxcuVKbNiwAVFRUdJ4BCJ6OQEBAcjLy8Pjx4/h7++vNs3FxQWGhoY4fPiwVPb48WP89ddfaNiwIQCgQYMGOHbsmNp8R48eLf+O0yuNh8PoteXq6orHjx/jm2++Qbdu3XD48GEsX7681O1MmDABc+fORZ06dVC/fn18+eWX0sXTAMDCwgJWVlZYsWIF7O3tcf369SIDKr/88kvY29ujWbNm0NPTw6ZNm2BnZ8cLrxGVEX19fenQlr6+vtq0KlWqYPTo0fjwww9haWmJmjVrYv78+cjOzsawYcMAAKNGjcKiRYvw4YcfYvjw4Thx4gRWr15d0atBrxjuCaLXVtOmTfHll19i3rx5aNy4MdauXavVaemTJ0/G+++/jyFDhsDLywtVq1bFu+++K03X09PDL7/8ghMnTqBx48aYNGlSkWsUVa1aFfPnz4eHhwdatGiBxMRE7Ny5E3p6/IgRlRVTU1OYmppqnDZ37lz07t0b77//Pt566y3Ex8dj9+7dsLCwAADUrFkTUVFR2Lp1K5o2bYrly5fjiy++qMju0ytIIUo7OIKIiIjoDcA/U4mIiEiWGIKIiIhIlhiCiIiISJYYgoiIiEiWGIKIiIhIlhiCiIiISJYYgoiIiEiWGIKIiIhIlhiCiIiISJYYgoiIiEiWGIKIiIhIlhiCiIiISJb+H5ewJeXp3LtxAAAAAElFTkSuQmCC\n" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "df" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 424 + }, + "id": "rArPo4GBO4wu", + "outputId": "5b6f8e96-ff85-4d6a-f07e-f19c73030a85" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " document \\\n", + "0 appeal no. lxvi of 1949.\\nappeal from the high... \n", + "1 civil appeal no.94 of 1949.\\n107 834 appeal fr... \n", + "2 iminal appeal no. 40 of 1951, 127 appeal from ... \n", + "3 appeal no. 388 of 1960.\\nappeal by special lea... \n", + "4 appeal no. 198 of 1954.\\nappeal from the judgm... \n", + "... ... \n", + "30267 article 378a of indian constitution \n", + "30268 article 392 of indian constitution \n", + "30269 article 393 of indian constitution \n", + "30270 article 394 of indian constitution \n", + "30271 article 395 of indian constitution \n", + "\n", + " summary \n", + "0 the charge created in respect of municipal pro... \n", + "1 an agreement for a lease, which a lease is by ... \n", + "2 the question whether a magistrate is \"personal... \n", + "3 the appellant was a member of a joint hindu fa... \n", + "4 the appellant was the ruler of the state of ba... \n", + "... ... \n", + "30267 special provision as to duration of andhra pra... \n", + "30268 power of the president to remove difficulties\\... \n", + "30269 short title this constitution may be called th... \n", + "30270 commencement this article and articles 5, 6, 7... \n", + "30271 repeals the indian independence act, 1947 , an... \n", + "\n", + "[11990 rows x 2 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
documentsummary
0appeal no. lxvi of 1949.\\nappeal from the high...the charge created in respect of municipal pro...
1civil appeal no.94 of 1949.\\n107 834 appeal fr...an agreement for a lease, which a lease is by ...
2iminal appeal no. 40 of 1951, 127 appeal from ...the question whether a magistrate is \"personal...
3appeal no. 388 of 1960.\\nappeal by special lea...the appellant was a member of a joint hindu fa...
4appeal no. 198 of 1954.\\nappeal from the judgm...the appellant was the ruler of the state of ba...
.........
30267article 378a of indian constitutionspecial provision as to duration of andhra pra...
30268article 392 of indian constitutionpower of the president to remove difficulties\\...
30269article 393 of indian constitutionshort title this constitution may be called th...
30270article 394 of indian constitutioncommencement this article and articles 5, 6, 7...
30271article 395 of indian constitutionrepeals the indian independence act, 1947 , an...
\n", + "

11990 rows × 2 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 25 + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "k-iw_1OhPCxG" + } + }, + { + "cell_type": "code", + "source": [ + "!pip install datasets transformers rouge-score nltk -q\n" + ], + "metadata": { + "id": "rHDtBvoYO4tK" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "import datasets\n", + "from datasets import Dataset\n", + "from datasets import load_metric" + ], + "metadata": { + "id": "UpbypT6zO4qe" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from transformers import AutoTokenizer\n", + "\n", + "model_checkpoint ='t5-small'\n", + "\n", + "tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)\n", + "\n", + "pad_on_right = tokenizer.padding_side == \"right\"" + ], + "metadata": { + "id": "e7fvCmmFO4n3" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "max_input_length = 6500\n", + "max_target_length = 500\n" + ], + "metadata": { + "id": "S5ODEDIlO4cl" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "df = df.sample(frac=1).reset_index(drop=True)\n", + "train = df[:3350]\n", + "valid = df[3350:3690]\n", + "test = df[3690:]\n", + "train.shape, valid.shape,test.shape" + ], + "metadata": { + "id": "xqkQF_fuPag8" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def preprocess_function(examples):\n", + " inputs = ['summarize:' + doc for doc in examples[\"document\"]]\n", + " model_inputs = tokenizer(inputs, max_length=max_input_length, truncation=True,padding='max_length')\n", + "\n", + " # Setup the tokenizer for targets\n", + " with tokenizer.as_target_tokenizer():\n", + " labels = tokenizer(examples[\"summary\"], max_length=max_target_length, truncation=True)\n", + "\n", + " model_inputs[\"labels\"] = labels[\"input_ids\"]\n", + " return model_inputs" + ], + "metadata": { + "id": "4ZsqOiBuPadc" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "import datasets\n", + "from datasets import Dataset\n", + "\n", + "train = Dataset.from_pandas(train)\n", + "valid = Dataset.from_pandas(valid)" + ], + "metadata": { + "id": "XHmYkkD7PabB" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "tokenized_train = train.map(preprocess_function, batched=True)\n", + "tokenized_valid = valid.map(preprocess_function, batched=True)" + ], + "metadata": { + "id": "vthXQFMWPaYE" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "finetunning\n" + ], + "metadata": { + "id": "FLWLFhtVP7pY" + } + }, + { + "cell_type": "code", + "source": [ + "import transformers\n", + "assert isinstance(tokenizer, transformers.PreTrainedTokenizerFast)" + ], + "metadata": { + "id": "0c2nE8BpPaVZ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# from transformers import AutoModelForSeq2SeqLM, DataCollatorForSeq2Seq, Seq2SeqTrainingArguments, Seq2SeqTrainer\n", + "from transformers import AutoModelForSeq2SeqLM, DataCollatorForSeq2Seq, TrainingArguments, Trainer\n", + "\n", + "model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)\n" + ], + "metadata": { + "id": "mMauI_pjPaSo" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "batch_size = 1\n", + "data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)" + ], + "metadata": { + "id": "2uTGqvGePaQA" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import nltk\n", + "import numpy as np\n", + "\n", + "def compute_metrics(eval_pred):\n", + " predictions, labels = eval_pred\n", + " decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)\n", + " # Replace -100 in the labels as we can't decode them.\n", + " labels = np.where(labels != -100, labels, tokenizer.pad_token_id)\n", + " decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)\n", + "\n", + " # Rouge expects a newline after each sentence\n", + " decoded_preds = [\"\\n\".join(nltk.sent_tokenize(pred.strip())) for pred in decoded_preds]\n", + " decoded_labels = [\"\\n\".join(nltk.sent_tokenize(label.strip())) for label in decoded_labels]\n", + "\n", + " result = metric.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)\n", + " # Extract a few results\n", + " result = {key: value.mid.fmeasure * 100 for key, value in result.items()}\n", + "\n", + " # Add mean generated length\n", + " prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]\n", + " result[\"gen_len\"] = np.mean(prediction_lens)\n", + "\n", + " return {k: round(v, 4) for k, v in result.items()}" + ], + "metadata": { + "id": "xFFVeR4BPaNR" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import gc\n", + "gc.collect()" + ], + "metadata": { + "id": "-ATGEnFePaKV" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", + "#DEVICE =\"cpu\"\n", + "# print(\"[INFO] training using {}\".format(torch.cuda.get_device_name(0)))\n", + "#print('There are %d GPU(s) available.' % torch.cuda.device_count())" + ], + "metadata": { + "id": "jkK2PnnzPaHh" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "torch.cuda.empty_cache()" + ], + "metadata": { + "id": "S79oQ4_fPaEh" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "%env WANDB_DISABLED=True\n" + ], + "metadata": { + "id": "5OiiAhKocomx" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# !pip install accelerate -U" + ], + "metadata": { + "id": "znzvk5zYbmtI" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# !pip install transformers[torch]" + ], + "metadata": { + "id": "wXZfNiSscHty" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# import accelerate\n", + "# print(accelerate.__version__)" + ], + "metadata": { + "id": "H3_m7AgYenQf" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# !pip uninstall torch torchvision -y\n", + "# !pip install torch==1.8.1 torchvision==0.9.1 torchtext==0.9.0\n" + ], + "metadata": { + "id": "08JbVykLk9f9" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# !pip install accelerate\n", + "# !pip install transformers -U\n" + ], + "metadata": { + "id": "4H8U18zZlRjA" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "model_name = model_checkpoint.split(\"/\")[-1]\n", + "args = TrainingArguments(\n", + " output_dir=f\"{model_name}-finetuned-Summarizer\",\n", + " evaluation_strategy = \"epoch\",\n", + " learning_rate=2e-5,\n", + " per_device_train_batch_size=batch_size,\n", + " per_device_eval_batch_size=batch_size,\n", + " weight_decay=0.01,\n", + " save_total_limit=3,\n", + " num_train_epochs=5,\n", + " gradient_accumulation_steps=2,\n", + " fp16=True\n", + ")" + ], + "metadata": { + "id": "KFBs956wPZs6" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "trainer = Trainer(\n", + " model,\n", + " args,\n", + " train_dataset=tokenized_train,\n", + " eval_dataset=tokenized_valid,\n", + " data_collator=data_collator,\n", + " tokenizer=tokenizer,\n", + " compute_metrics=compute_metrics\n", + ")" + ], + "metadata": { + "id": "GF0sl-u6QsxE" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "metric = load_metric(\"rouge\")\n" + ], + "metadata": { + "id": "nEwEBLaVQxsm" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb=10'" + ], + "metadata": { + "id": "DL0zyq22wdSg" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "trainer.train()" + ], + "metadata": { + "id": "rf3Nt-WRSNWa" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# trainer.save_model(\"\")\n" + ], + "metadata": { + "id": "yJk7o3sitBtE" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "eval_dataset = Dataset.from_pandas(test)" + ], + "metadata": { + "id": "XF222NDeSSba" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "eval_dataset = eval_dataset.map(\n", + " preprocess_function,\n", + " batched=True)" + ], + "metadata": { + "id": "VjyxECrjSWT9" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "predict_results = trainer.predict(\n", + " eval_dataset,max_length=128, num_beams=3)" + ], + "metadata": { + "id": "qqnGyisBSZ5H" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "metrics = predict_results.metrics" + ], + "metadata": { + "id": "jYJ1xXR8SdTn" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "if args.predict_with_generate:\n", + " predictions = tokenizer.batch_decode(predict_results.predictions, skip_special_tokens=True, clean_up_tokenization_spaces=True)\n", + " predictions = [pred.strip() for pred in predictions]" + ], + "metadata": { + "id": "xaRK8lyISqth" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "predictions[:2]" + ], + "metadata": { + "id": "kL5cOKd4SuXz" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "test['summary'][:2]" + ], + "metadata": { + "id": "9VLwSVsNSyXO" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/architects_of_future/IDC notebook/LLM_finetuning.ipynb b/architects_of_future/IDC notebook/LLM_finetuning.ipynb new file mode 100644 index 00000000..4be963c2 --- /dev/null +++ b/architects_of_future/IDC notebook/LLM_finetuning.ipynb @@ -0,0 +1,1210 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "1cc557a5-5a55-4bae-8555-a2b655abfa4a", + "metadata": {}, + "source": [ + "SPDX-License-Identifier: Apache-2.0\n", + "Copyright (c) 2023, Rahul Unnikrishnan Nair \n" + ] + }, + { + "cell_type": "markdown", + "id": "2123be0f-586b-47c9-af17-9d667f28eb3d", + "metadata": {}, + "source": [ + "---\n", + "\n", + "**Text to SQL Generation: Fine-Tuning LLMs with QLoRA on Intel**\n", + "\n", + "👋 Hello and welcome! In this Jupyter Notebook, we will walkthrough the process of fine-tuning a large language model (LLM) to improve its capabilities in generating SQL queries from natural language input. The notebook is suitable for AI engineers and practitioners looking to tune LLMs for specialized tasks such as Text-to-SQL conversions.\n", + "\n", + "**What you will learn with this Notebook**\n", + "\n", + "- 🛠️ Fine-tune a Language Model with either a pre-existing dataset or a custom dataset tailored to your needs on Intel Hw.\n", + "- 💡 Gain insights into the fine-tuning process, including how to manipulate various training parameters to optimize your model's performance.\n", + "- 📊 Test different configurations and observe the results in real-time.\n", + "\n", + "**Hardware Compatibility**\n", + "\n", + "- 🖥️ Designed for 4th Generation Intel® Xeon® Scalable Processors (CPU) and Intel® Data Center GPU Max Series 1100 (XPU)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "67c0e67f-8473-44d6-8065-edfc63a6459d", + "metadata": {}, + "outputs": [], + "source": [ + "!echo \"List of Intel GPUs available on the system:\"\n", + "!xpu-smi discovery 2> /dev/null\n", + "!echo \"Intel Xeon CPU used by this notebook:\"\n", + "!lscpu | grep \"Model name\"" + ] + }, + { + "cell_type": "markdown", + "id": "76293a12-551e-421a-8eb3-06387931d307", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, + "source": [ + "---\n", + "\n", + "**Fine-Tuning with QLoRA: Balancing Memory Efficiency and Adaptability**\n", + "\n", + "We leverage the QLoRA methodology for fine-tuning, enabling the loading and refinement of LLMs within the constraints of available GPU memory. Quantized Low Rank Adaptation or QLoRA achieves this by applying a clever combination of weight quantization and adapter-based finetuning.\n", + "\n", + "**How Does QLoRA Work?**\n", + "\n", + "- QLoRA reduces memory footprint via weight quantization. It compresses the pre-trained model weights significantly.\n", + "- During fine-tuning, it focuses on optimizing adapter parameters—low-rank matrices added to the network, tailored for the specific task.\n", + "- This selective training is computationally efficient, targeting a smaller set of trainable parameters.\n", + "\n", + "\n", + "**What is the Big Picture?**\n", + "\n", + "- Think reparameterization: We inject LoRA weights, training only these, not the entire layer, for fine-tuning.\n", + "- This technique is key for task-specific model adaptation.\n", + "- Imagine a hub-and-spoke model for deployment: The hub is the foundational model, and the spokes are task-specific LoRA adapters.\n", + "\n", + "Below, on the left, is an overview of the reparameterization implemented with LoRA (with Quantization). This involves a set of low-rank matrices—think of these as an essential subset of larger weight matrices—trained specifically for the task. On the right, there's a high-level view of a hub-and-spoke model for LLM deployment, where the hub represents the foundational model, and the spokes are the LoRA adapters.\n", + "\n", + "
\n", + " \"lora_adapters_reparameterization\"\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "6362a562-f3ce-4dce-9678-ce317e554a04", + "metadata": {}, + "source": [ + "## Initialization\n", + "\n", + "Let's first install and import all the necessary packages required for the fine-tuning process.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9236e9d1-e75e-4089-9a4d-27421521cfd5", + "metadata": { + "jupyter": { + "source_hidden": true + } + }, + "outputs": [], + "source": [ + "import sys\n", + "import site\n", + "from pathlib import Path\n", + "\n", + "!echo \"Installation in progress, please wait...\"\n", + "!{sys.executable} -m pip cache purge > /dev/null\n", + "!{sys.executable} -m pip install --pre bigdl-llm[xpu]==2.4.0b20231116 --no-warn-script-location -f https://developer.intel.com/ipex-whl-stable-xpu > /dev/null\n", + "!{sys.executable} -m pip install peft==0.5.0 --no-deps > /dev/null\n", + "!{sys.executable} -m pip install accelerate==0.23.0 --no-deps --no-warn-script-location > /dev/null\n", + "!{sys.executable} -m pip install transformers==4.34.0 --no-warn-script-location > /dev/null \n", + "!{sys.executable} -m pip install datasets==2.15 --no-warn-script-location > /dev/null 2>&1 \n", + "!{sys.executable} -m pip install fsspec==2023.9.2 > /dev/null 2>&1\n", + "!echo \"Installation completed.\"\n", + "\n", + "def get_python_version():\n", + " return \"python\" + \".\".join(map(str, sys.version_info[:2]))\n", + "\n", + "def set_local_bin_path():\n", + " local_bin = str(Path.home() / \".local\" / \"bin\") \n", + " local_site_packages = str(\n", + " Path.home() / \".local\" / \"lib\" / get_python_version() / \"site-packages\"\n", + " )\n", + " sys.path.append(local_bin)\n", + " sys.path.insert(0, site.getusersitepackages())\n", + " sys.path.insert(0, sys.path.pop(sys.path.index(local_site_packages)))\n", + "\n", + "set_local_bin_path()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31eb9cf2-abcf-48f8-918b-37a18f85ac7c", + "metadata": { + "jupyter": { + "source_hidden": true + } + }, + "outputs": [], + "source": [ + "import logging\n", + "import os\n", + "import sys\n", + "from math import ceil\n", + "from typing import Optional, Tuple\n", + "import warnings\n", + "\n", + "warnings.filterwarnings(\n", + " \"ignore\", category=UserWarning, module=\"intel_extension_for_pytorch\"\n", + ")\n", + "warnings.filterwarnings(\n", + " \"ignore\", category=UserWarning, module=\"torchvision.io.image\", lineno=13\n", + ")\n", + "warnings.filterwarnings(\"ignore\", message=\"You are using the default legacy behaviour\")\n", + "warnings.filterwarnings(\"ignore\", category=UserWarning, message=\".*Parameter.*\")\n", + "warnings.filterwarnings(\n", + " \"ignore\",\n", + " category=FutureWarning,\n", + " message=\"This implementation of AdamW is deprecated\",\n", + ")\n", + "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n", + "os.environ[\"NUMEXPR_MAX_THREADS\"] = \"28\"\n", + "logging.getLogger(\"transformers\").setLevel(logging.ERROR)\n", + "logging.getLogger(\"bigdl\").setLevel(logging.ERROR)\n", + "\n", + "\n", + "import torch\n", + "import intel_extension_for_pytorch as ipex\n", + "from datasets import load_dataset\n", + "from datasets import Dataset\n", + "from bigdl.llm.transformers import AutoModelForCausalLM\n", + "from bigdl.llm.transformers.qlora import (\n", + " get_peft_model,\n", + " prepare_model_for_kbit_training as prepare_model,\n", + ")\n", + "from peft import LoraConfig\n", + "from bigdl.llm.transformers.qlora import PeftModel\n", + "import transformers\n", + "from transformers import (\n", + " DataCollatorForSeq2Seq,\n", + " LlamaTokenizer,\n", + " AutoTokenizer,\n", + " Trainer,\n", + " TrainingArguments,\n", + ")\n", + "\n", + "transformers.logging.set_verbosity_error()" + ] + }, + { + "cell_type": "markdown", + "id": "02b08285", + "metadata": {}, + "source": [ + "---\n", + "\n", + "**Note on Model Storage Management**\n", + "\n", + "A set of LLM foundation models are supported out-of-the-box as stated below `BASE_MDOELS` dictionary. However, if you're interested in experimenting with additional models, consider the following guidelines:\n", + "\n", + "- **Storage Quota:** Be mindful of your free storage quota and space requirements for additional models.\n", + "- **PEFT Library Support:** For models supported by `peft`, refer to the [PEFT repository](https://github.com/huggingface/peft/blob/main/src/peft/utils/other.py#L434) for predefined LoRA target modules.\n", + "- **Custom Models:** For non-`peft` models, manually configure LoRA target modules in `LoraConfig`. Example for llama models: `[\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\"]`.\n", + "- **Disk Space Management:** Check disk space with the provided Python function. Delete cache to free space, but this requires re-downloading models later.\n", + "- **Reset Model Cache Path:** Update `MODEL_CACHE_PATH = \"~/\"` in the **Model Configuration** cell.\n", + "\n", + "---\n", + "\n", + "**Python Function to Check Disk Space**\n", + "\n", + "```python\n", + "# Function to check available disk space in the Hugging Face cache directory\n", + "import os\n", + "import shutil\n", + "\n", + "def check_disk_space(path=\"~/.cache/huggingface/\"):\n", + " abs_path = os.path.expanduser(path)\n", + " total, used, free = shutil.disk_usage(abs_path)\n", + " print(f\"Total: {total // (2**30)} GiB\")\n", + " print(f\"Used: {used // (2**30)} GiB\")\n", + " print(f\"Free: {free // (2**30)} GiB\")\n", + "\n", + "# Example usage\n", + "check_disk_space()\n", + "```\n" + ] + }, + { + "cell_type": "markdown", + "id": "131864cb-ce5d-405b-8886-5d0f1f487c30", + "metadata": {}, + "source": [ + "---\n", + "\n", + "**Tailoring Your Model Configuration**\n", + "\n", + "Dive into the customization core of LLM fine-tuning, equipped with a diverse range of base models to suit unique goals.\n", + "\n", + "- **Model Choices in `BASE_MODELS`**: \n", + " - From the `open_llama_3b_v2` to the broader `Llama-2-13b-hf`.\n", + " - Specialized options like `CodeLlama-7b-hf`.\n", + " - Experiment to find the best fit for your objectives.\n", + "\n", + "- **Dataset**:\n", + " - Using `b-mc2/sql-create-context` from Huggingface datasets, a set of 78,577 examples (natural language queries, SQL statements).\n", + " - Ideal for text-to-SQL models. Dataset details [here](https://huggingface.co/datasets/b-mc2/sql-create-context).\n", + "\n", + "- **Your Model Options**: Within the `BASE_MODELS`, you’ll find options ranging from the nimble\n", + " `open_llama_7b_v2` to the more expansive `Llama-2-13b-hf`, and specialized variants like `CodeLlama-7b-hf`.\n", + " Feel free to switch between these models to discover which one aligns best with your objectives.\n", + "\n", + "- **LoRA Parameters - Your Knobs to Turn**:\n", + " - `r` (Rank): This is a key factor in how finely your model can adapt. A higher rank can grasp more\n", + " complex nuances, while a lower rank ensures a leaner memory footprint.\n", + " - `lora_alpha` (Scaling Factor): Adjusts LoRA adapters' impact.\n", + " the integrity of the pre-trained weights.\n", + " - `target_modules`: You decide which parts of the transformer model to enhance with LoRA adapters,\n", + " directly impacting how your model interprets and generates language.\n", + " - `lora_dropout`: Controls overfitting; experiment for optimal generalization.\n", + " - `bias`: Modify to observe learning dynamic changes.\n", + "\n", + "This notebook is set to start with `CodeLlama-7b-hf` as the default model, as our task is to generate code. To use models like Llama 2, you will have to accept the usage policy as stipulated [here](https://ai.meta.com/llama/use-policy/)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "88077cc2-8fcf-4128-ac44-9fb2bb327398", + "metadata": { + "jupyter": { + "source_hidden": true + } + }, + "outputs": [], + "source": [ + "BASE_MODELS = {\n", + " \"0\": \"NousResearch/Nous-Hermes-Llama-2-7b\", # https://huggingface.co/NousResearch/Nous-Hermes-llama-2-7b\n", + " \"1\": \"NousResearch/Llama-2-7b-chat-hf\", # https://huggingface.co/NousResearch/Llama-2-7b-chat-hf\n", + " \"2\": \"NousResearch/Llama-2-13b-hf\", # https://huggingface.co/NousResearch/Llama-2-13b-hf\n", + " \"3\": \"NousResearch/CodeLlama-7b-hf\", # https://huggingface.co/NousResearch/CodeLlama-7b-hf\n", + " \"4\": \"Phind/Phind-CodeLlama-34B-v2\", # https://huggingface.co/Phind/Phind-CodeLlama-34B-v2\n", + " \"5\": \"openlm-research/open_llama_3b_v2\", # https://huggingface.co/openlm-research/open_llama_3b_v2\n", + " \"6\": \"openlm-research/open_llama_13b\", # https://huggingface.co/openlm-research/open_llama_13b\n", + " \"7\": \"HuggingFaceH4/zephyr-7b-beta\", # https://huggingface.co/HuggingFaceH4/zephyr-7b-beta\n", + "}\n", + "BASE_MODEL = BASE_MODELS[\"3\"]\n", + "DATA_PATH = \"b-mc2/sql-create-context\"\n", + "MODEL_PATH = \"./final_model\"\n", + "ADAPTER_PATH = \"./lora_adapters\"\n", + "DEVICE = torch.device(\"xpu\" if torch.xpu.is_available() else \"cpu\")\n", + "LORA_CONFIG = LoraConfig(\n", + " r=16, # rank\n", + " lora_alpha=32, # scaling factor\n", + " target_modules=[\"q_proj\", \"k_proj\", \"v_proj\"], \n", + " lora_dropout=0.05,\n", + " bias=\"none\",\n", + " task_type=\"CAUSAL_LM\",\n", + ")\n", + "MODEL_CACHE_PATH = \"/home/common/data/Big_Data/GenAI/llm_models\"\n", + "\n", + "print(\"=\" * 80)\n", + "print(f\"Using Device: {DEVICE}\")\n", + "print(f\"Final model will be saved to: {MODEL_PATH}\")\n", + "print(f\"LoRA adapters will be saved to: {ADAPTER_PATH}\")\n", + "print(f\"Finetuning Model: {BASE_MODEL}\")\n", + "print(f\"Using dataset from: {DATA_PATH}\")\n", + "print(f\"Model cache: {MODEL_CACHE_PATH}\")\n", + "print(\"=\" * 80)" + ] + }, + { + "cell_type": "markdown", + "id": "d797b621-fd06-4ae9-a883-d7d15f16d6c4", + "metadata": {}, + "source": [ + "---\n", + "\n", + "**Prompt Engineering for Text-to-SQL Conversion**\n", + "\n", + "In the realm of fine-tuning language models for specialized tasks, the design of the prompt is pivotal. The function `generate_prompt_sql` encapsulates the input question, the relevant database context, and the expected output in a structured and concise manner.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8ecd7df-b7ce-48b0-ba9f-04b7ec0c1cf4", + "metadata": { + "jupyter": { + "source_hidden": true + } + }, + "outputs": [], + "source": [ + "def generate_prompt_sql(input_question, context, output=\"\"):\n", + " \"\"\"\n", + " Generates a prompt for fine-tuning the LLM model for text-to-SQL tasks.\n", + "\n", + " Parameters:\n", + " input_question (str): The input text or question to be converted to SQL.\n", + " context (str): The schema or context in which the SQL query operates.\n", + " output (str, optional): The expected SQL query as the output.\n", + "\n", + " Returns:\n", + " str: A formatted string serving as the prompt for the fine-tuning task.\n", + " \"\"\"\n", + " return f\"\"\"You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables. \n", + "\n", + "You must output the SQL query that answers the question.\n", + "\n", + "### Input:\n", + "{input_question}\n", + "\n", + "### Context:\n", + "{context}\n", + "\n", + "### Response:\n", + "{output}\"\"\"" + ] + }, + { + "cell_type": "markdown", + "id": "9dd863cc", + "metadata": {}, + "source": [ + "---\n", + "\n", + "**Model Loading and Configuration**\n", + "\n", + "Initializing the `FineTuner`, we load the base model using `base_model_id`. Key to this setup is the `load_in_low_bit` option, using [BigDL library](https://bigdl.readthedocs.io/en/latest/) to load the model in a 4-bit format (\"nf4\"). This approach significantly cuts down on memory. Additionally, we configure the LoRA adapters for mixed-precision training with `torch.float16`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aeab82dc", + "metadata": { + "jupyter": { + "source_hidden": true + } + }, + "outputs": [], + "source": [ + "def setup_model_and_tokenizer(base_model_id: str):\n", + " \"\"\"Downloads / Loads the pre-trained model in NF4 datatype and tokenizer based on the given base model ID for training.\"\"\"\n", + " local_model_id = base_model_id.replace(\"/\", \"--\")\n", + " local_model_path = os.path.join(MODEL_CACHE_PATH, local_model_id)\n", + " print(f\"local model path is: {local_model_path}\")\n", + "\n", + " try:\n", + " model = AutoModelForCausalLM.from_pretrained(\n", + " local_model_path,\n", + " load_in_low_bit=\"nf4\",\n", + " optimize_model=False,\n", + " torch_dtype=torch.float16,\n", + " modules_to_not_convert=[\"lm_head\"],\n", + " )\n", + " except OSError as e:\n", + " print(e)\n", + " sys.exit()\n", + " logging.info(\n", + " f\"Model not found locally. Downloading {base_model_id} to cache...\"\n", + " )\n", + " model = AutoModelForCausalLM.from_pretrained(\n", + " base_model_id,\n", + " load_in_low_bit=\"nf4\",\n", + " optimize_model=False,\n", + " torch_dtype=torch.float16,\n", + " modules_to_not_convert=[\"lm_head\"],\n", + " )\n", + "\n", + " try:\n", + " if \"llama\" in base_model_id.lower():\n", + " tokenizer = LlamaTokenizer.from_pretrained(local_model_path)\n", + " else:\n", + " tokenizer = AutoTokenizer.from_pretrained(local_model_path)\n", + " except OSError:\n", + " logging.info(\n", + " f\"Tokenizer not found locally. Downloading tokenizer for {base_model_id} to cache...\"\n", + " )\n", + " if \"llama\" in base_model_id.lower():\n", + " tokenizer = LlamaTokenizer.from_pretrained(base_model_id)\n", + " else:\n", + " tokenizer = AutoTokenizer.from_pretrained(base_model_id)\n", + " tokenizer.pad_token_id = 0\n", + " tokenizer.padding_side = \"left\"\n", + " return model, tokenizer" + ] + }, + { + "cell_type": "markdown", + "id": "3777656b-74f4-4ebc-b741-a5d50bc6e79a", + "metadata": {}, + "source": [ + "---\n", + "\n", + "**FineTuner**\n", + "\n", + "The `FineTuner` class encapsulates the entire process of fine-tuning llms for tasks such as text-to-SQL conversion.\n", + "\n", + "\n", + "**Tokenization Strategy**\n", + "\n", + "The tokenization process is tailored to the type of model being fine-tuned. For instance, if we are working with a Llama model, we utilize a `LlamaTokenizer` to ensure compatibility with the model's expected input format. For other models, a generic `AutoTokenizer` is used. We configure the tokenizer to pad from the left side (`padding_side=\"left\"`) and set the pad token ID to 0.\n", + "\n", + "**Data Tokenization and Preparation**\n", + "\n", + "The `tokenize_data` method is where the fine-tuner ingests raw text data and converts it into a format suitable for training the model. This method handles the addition of end-of-sequence tokens, truncation to a specified `cutoff_len`, and conditioning on the input for training.\n", + "\n", + "**Dataset Handling**\n", + "\n", + "`prepare_data` manages the splitting of data into training and validation sets, applying the `tokenize_data` transformation to each entry. This ensures that our datasets are ready for input into the model, with all necessary tokenization applied.\n", + "\n", + "**Training Process**\n", + "\n", + "Finally, the `train_model` method orchestrates the training process, setting up the `Trainer` with the correct datasets, training arguments, and data collator. The fine-tuning process is encapsulated within the `finetune` method, which strings together all the previous steps into a coherent pipeline, from model setup to training execution.\n", + "\n", + "**Using QLoRA for Efficient Fine-Tuning**\n", + "1. Load a pretrained model (e.g., LLaMA) in low precision with `load_in_low_bit=\"nf4\"` for 4-bit quantized weights.\n", + "2. Prepare the quantized model with `prepare_model(model)`, handling weight quantization.\n", + "3. Add LoRA adapters via `get_peft_model(model, config)` for setting adapter parameters.\n", + "4. Fine-tune with `Trainer`, focusing gradients on adapters while keeping base model weights fixed.\n", + "\n", + "**Code Implementation**\n", + "- Model loading with BigDL's `AutoModelForCausalLM`, initializing in 4-bit using `load_in_low_bit=\"nf4\"`.\n", + "- `prepare_model()` quantizes the model weights.\n", + "- `get_peft_model()` adds LoRA adapters.\n", + "- Trainer handles fine-tuning, optimizing only adapter weights.\n", + "\n", + "\n", + "So in summary, we leverage QLoRA in BigDL to load the base LLM in low precision, inject adapters with `peft`, and efficiently finetune by optimizing just the adapters end-to-end while keeping the base model fixed. This unlocks huge memory savings, allowing us to adapt giant models." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4d8f4cb8-0da5-4572-bd07-bdae1db897a3", + "metadata": { + "jupyter": { + "source_hidden": true + } + }, + "outputs": [], + "source": [ + "class FineTuner:\n", + " \"\"\"A class to handle the fine-tuning of LLM models.\"\"\"\n", + "\n", + " def __init__(self, base_model_id: str, model_path: str, device: torch.device):\n", + " \"\"\"\n", + " Initialize the FineTuner with base model, model path, and device.\n", + "\n", + " Parameters:\n", + " base_model_id (str): Id of pre-trained model to use for fine-tuning.\n", + " model_path (str): Path to save the fine-tuned model.\n", + " device (torch.device): Device to run the model on.\n", + " \"\"\"\n", + " self.base_model_id = base_model_id\n", + " self.model_path = model_path\n", + " self.device = device\n", + " self.model, self.tokenizer = setup_model_and_tokenizer(base_model_id)\n", + "\n", + "\n", + " def tokenize_data(\n", + " self, data_points, add_eos_token=True, train_on_inputs=False, cutoff_len=512\n", + " ) -> dict:\n", + " \"\"\"\n", + " Tokenizes dataset of SQL related data points consisting of questions, context, and answers.\n", + "\n", + " Parameters:\n", + " data_points (dict): A batch from the dataset containing 'question', 'context', and 'answer'.\n", + " add_eos_token (bool): Whether to add an EOS token at the end of each tokenized sequence.\n", + " cutoff_len (int): The maximum length for each tokenized sequence.\n", + "\n", + " Returns:\n", + " dict: A dictionary containing tokenized 'input_ids', 'attention_mask', and 'labels'.\n", + " \"\"\"\n", + " try:\n", + " question = data_points[\"question\"]\n", + " context = data_points[\"context\"]\n", + " answer = data_points[\"answer\"]\n", + " if train_on_inputs:\n", + " user_prompt = generate_prompt_sql(question, context)\n", + " tokenized_user_prompt = self.tokenizer(\n", + " user_prompt,\n", + " truncation=True,\n", + " max_length=cutoff_len,\n", + " padding=False,\n", + " return_tensors=None,\n", + " )\n", + " user_prompt_len = len(tokenized_user_prompt[\"input_ids\"])\n", + " if add_eos_token:\n", + " user_prompt_len -= 1\n", + "\n", + " combined_text = generate_prompt_sql(question, context, answer)\n", + " tokenized = self.tokenizer(\n", + " combined_text,\n", + " truncation=True,\n", + " max_length=cutoff_len,\n", + " padding=False,\n", + " return_tensors=None,\n", + " )\n", + " if (\n", + " tokenized[\"input_ids\"][-1] != self.tokenizer.eos_token_id\n", + " and add_eos_token\n", + " and len(tokenized[\"input_ids\"]) < cutoff_len\n", + " ):\n", + " tokenized[\"input_ids\"].append(self.tokenizer.eos_token_id)\n", + " tokenized[\"attention_mask\"].append(1)\n", + " tokenized[\"labels\"] = tokenized[\"input_ids\"].copy()\n", + " if train_on_inputs:\n", + " tokenized[\"labels\"] = [-100] * user_prompt_len + tokenized[\"labels\"][\n", + " user_prompt_len:\n", + " ]\n", + "\n", + " return tokenized\n", + " except Exception as e:\n", + " logging.error(\n", + " f\"Error in batch tokenization: {e}, Line: {e.__traceback__.tb_lineno}\"\n", + " )\n", + " raise e\n", + "\n", + " def prepare_data(self, data, val_set_size=100) -> Dataset:\n", + " \"\"\"Prepare training and validation datasets.\"\"\"\n", + " try:\n", + " train_val_split = data[\"train\"].train_test_split(\n", + " test_size=val_set_size, shuffle=True, seed=42\n", + " )\n", + " train_data = train_val_split[\"train\"].shuffle().map(self.tokenize_data)\n", + " val_data = train_val_split[\"test\"].shuffle().map(self.tokenize_data)\n", + " return train_data, val_data\n", + " except Exception as e:\n", + " logging.error(\n", + " f\"Error in preparing data: {e}, Line: {e.__traceback__.tb_lineno}\"\n", + " )\n", + " raise e\n", + "\n", + " def train_model(self, train_data, val_data, training_args):\n", + " \"\"\"\n", + " Fine-tune the model with the given training and validation data.\n", + "\n", + " Parameters:\n", + " train_data (Dataset): Training data.\n", + " val_data (Optional[Dataset]): Validation data.\n", + " training_args (TrainingArguments): Training configuration.\n", + " \"\"\"\n", + " try:\n", + " self.model = self.model.to(self.device)\n", + " self.model = prepare_model(self.model)\n", + " self.model = get_peft_model(self.model, LORA_CONFIG)\n", + " trainer = Trainer(\n", + " model=self.model,\n", + " train_dataset=train_data,\n", + " eval_dataset=val_data,\n", + " args=training_args,\n", + " data_collator=DataCollatorForSeq2Seq(\n", + " self.tokenizer,\n", + " pad_to_multiple_of=8,\n", + " return_tensors=\"pt\",\n", + " padding=True,\n", + " ),\n", + " )\n", + " self.model.config.use_cache = False\n", + " trainer.train()\n", + " self.model.save_pretrained(self.model_path)\n", + " except Exception as e:\n", + " logging.error(f\"Error in model training: {e}\")\n", + "\n", + " def finetune(self, data_path, training_args):\n", + " \"\"\"\n", + " Execute the fine-tuning pipeline.\n", + "\n", + " Parameters:\n", + " data_path (str): Path to the data for fine-tuning.\n", + " training_args (TrainingArguments): Training configuration.\n", + " \"\"\"\n", + " try:\n", + " data = load_dataset(data_path)\n", + " train_data, val_data = self.prepare_data(data)\n", + " self.train_model(train_data, val_data, training_args)\n", + " except KeyboardInterrupt:\n", + " print(\"Interrupt received, saving model...\")\n", + " self.model.save_pretrained(f\"{self.model_path}_interrupted\")\n", + " print(f\"Model saved to {self.model_path}_interrupted\")\n", + " except Exception as e:\n", + " logging.error(f\"Error in fintuning: {e}\")" + ] + }, + { + "cell_type": "markdown", + "id": "34af4187-d362-49b8-bdbc-e8d2a7ae0ac0", + "metadata": {}, + "source": [ + "---\n", + "**Fine-Tuning the Model**\n", + "\n", + "The `lets_finetune` function orchestrates the fine-tuning process, offering a customizable interface for training. It enables specification of device, model, batch size, warm-up steps, learning rate, and maximum training steps.\n", + "\n", + "\n", + "**Some of the key Training Parameters:**\n", + "- `per_device_batch_size`: Number of batches on each XPU.\n", + "- `gradient_accumulation_steps`: Enables larger effective batch sizes.\n", + "- `warmup_steps`: Stabilizes training dynamics at the start.\n", + "- `save_steps`: Determines checkpoint frequency.\n", + "- `max_steps`: Limits training iterations, start with a high number like 1000 or 2000 (default here is `200`).\n", + "- `learning_rate`: Balances convergence speed and training stability.\n", + "- `max_grad_norm`: Clips gradients to avoid excessively large values.\n", + "\n", + "**Monitoring and Interruption**\n", + "- Monitor training/validation loss to identify optimal stopping point.\n", + "- Interrupt training in Jupyter via `Kernel -> Interrupt Kernel` if performance is satisfactory before `max_steps`.\n", + "- Latest checkpoint is saved in `./final_model_interrupted`; last saved adapter checkpoint in `./lora_adapters`.\n", + "\n", + "This setup allows for efficient and flexible model fine-tuning, adaptable to varying project needs and computational constraints.\n", + "\n", + "---\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "67a39122-eafc-483c-ad81-9c55de15e936", + "metadata": { + "jupyter": { + "source_hidden": true + } + }, + "outputs": [], + "source": [ + "ENABLE_WANDB = False\n", + "\n", + "def lets_finetune(\n", + " device=DEVICE,\n", + " model=BASE_MODEL,\n", + " per_device_batch_size=4,\n", + " warmup_steps=20,\n", + " learning_rate=2e-5,\n", + " max_steps=200,\n", + " gradient_accum_steps=4,\n", + "):\n", + " try:\n", + " # Training parameters\n", + " save_steps = 20\n", + " eval_steps = 20\n", + " max_grad_norm = 0.3\n", + " save_total_limit = 3\n", + " logging_steps = 20\n", + "\n", + " print(\"\\n\" + \"\\033[1;34m\" + \"=\" * 60 + \"\\033[0m\")\n", + " print(\"\\033[1;34mTraining Parameters:\\033[0m\")\n", + " param_format = \"\\033[1;34m{:<25} {}\\033[0m\"\n", + " print(param_format.format(\"Foundation model:\", BASE_MODEL))\n", + " print(param_format.format(\"Model save path:\", MODEL_PATH))\n", + " print(param_format.format(\"Device used:\", DEVICE))\n", + " if DEVICE.type.startswith(\"xpu\"):\n", + " print(param_format.format(\"Intel GPU:\", torch.xpu.get_device_name()))\n", + " print(param_format.format(\"Batch size per device:\", per_device_batch_size))\n", + " print(param_format.format(\"Gradient accum. steps:\", gradient_accum_steps))\n", + " print(param_format.format(\"Warmup steps:\", warmup_steps))\n", + " print(param_format.format(\"Save steps:\", save_steps))\n", + " print(param_format.format(\"Evaluation steps:\", eval_steps))\n", + " print(param_format.format(\"Max steps:\", max_steps))\n", + " print(param_format.format(\"Learning rate:\", learning_rate))\n", + " print(param_format.format(\"Max gradient norm:\", max_grad_norm))\n", + " print(param_format.format(\"Save total limit:\", save_total_limit))\n", + " print(param_format.format(\"Logging steps:\", logging_steps))\n", + " print(\"\\033[1;34m\" + \"=\" * 60 + \"\\033[0m\\n\")\n", + "\n", + " # Initialize the finetuner with the model and device information\n", + " finetuner = FineTuner(\n", + " base_model_id=model, model_path=MODEL_PATH, device=device\n", + " )\n", + "\n", + " training_args = TrainingArguments(\n", + " per_device_train_batch_size=per_device_batch_size,\n", + " gradient_accumulation_steps=gradient_accum_steps,\n", + " warmup_steps=warmup_steps,\n", + " save_steps=save_steps,\n", + " save_strategy=\"steps\",\n", + " eval_steps=eval_steps,\n", + " evaluation_strategy=\"steps\",\n", + " max_steps=max_steps,\n", + " learning_rate=learning_rate,\n", + " #max_grad_norm=max_grad_norm,\n", + " bf16=True,\n", + " #lr_scheduler_type=\"cosine\",\n", + " load_best_model_at_end=True,\n", + " ddp_find_unused_parameters=False,\n", + " group_by_length=True,\n", + " save_total_limit=save_total_limit,\n", + " logging_steps=logging_steps,\n", + " optim=\"adamw_hf\",\n", + " output_dir=\"./lora_adapters\",\n", + " logging_dir=\"./logs\",\n", + " report_to=\"wandb\" if ENABLE_WANDB else [],\n", + " )\n", + "\n", + " # Start fine-tuning\n", + " finetuner.finetune(DATA_PATH, training_args)\n", + " except Exception as e:\n", + " logging.error(f\"Error occurred: {e}\")" + ] + }, + { + "cell_type": "markdown", + "id": "5ddc5dee-498b-498a-9ad8-d03f569e9e9a", + "metadata": {}, + "source": [ + "We can optionally use Weights & Biases to track our training metrics, uncomment the below cell to enable `wandb`. You will need to pass in your API key when prompted. You can ofcourse skip this step if you'd like to.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f1a31697-8bf5-4ec9-9e13-bb7095218fcd", + "metadata": { + "jupyter": { + "source_hidden": true + } + }, + "outputs": [], + "source": [ + "#print(\"installing wandb...\")\n", + "#!{sys.executable} -m pip install -U --force wandb==0.15.12 > /dev/null 2>&1\n", + "#print(\"installation complete...\")\n", + "#import wandb\n", + "\n", + "#os.environ[\"WANDB_PROJECT\"] = f\"text-to-sql-finetune-model-name_{BASE_MODEL.replace('/', '_')}\"\n", + "#wandb.login()\n", + "\n", + "#ENABLE_WANDB = True" + ] + }, + { + "cell_type": "markdown", + "id": "c497cbaf-994b-474b-92f1-fb33fca6b81f", + "metadata": {}, + "source": [ + "\n", + "---\n", + "\n", + "**Let's Finetune!**\n", + "\n", + "Now it's time to actually fine-tune the model. The `lets_finetune` function below takes care of this. It initializes a FineTuner object with the configurations you've set or left as default." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1fa45c6d-fac1-4331-8dd5-3ebf9cceed6d", + "metadata": {}, + "outputs": [], + "source": [ + "lets_finetune()" + ] + }, + { + "cell_type": "markdown", + "id": "39613af7-3408-4412-8ded-aa6116759f06", + "metadata": {}, + "source": [ + "\n", + "**Testing our Fine-Tuned LLM**\n", + "\n", + "Congratulations on successfully fine-tuning your Language Model for Text-to-SQL tasks! It's now time to put the model to the test.\n", + "\n", + "___" + ] + }, + { + "cell_type": "markdown", + "id": "00cb638e-1464-4596-8809-186834b3f277", + "metadata": {}, + "source": [ + "**TextToSQLGenerator: Generating SQL Queries from Text Prompts**\n", + "\n", + "**Important Note**: Remember to re-import necessary packages and re-define `BASE_MODELS` by rerunning relevant cells if the Jupyter kernel is restarted.\n", + "\n", + "**Overview of `TextToSQLGenerator`**\n", + "- Designed for generating SQL queries from natural language prompts.\n", + "- Allows model selection at initialization.\n", + "\n", + "**Initialization and Configuration:**\n", + "- Set `use_adapter` to `True` for using the fine-tuned LoRA model; defaults to the base model otherwise.\n", + "- Automatic tokenizer selection based on the model ID, with special handling for 'llama' models.\n", + "- Optimized loading for CPU / XPUs (`low_cpu_mem_usage`, `load_in_4bit`).\n", + "- For LoRA models, loads fine-tuned checkpoints for inference.\n", + "\n", + "**Generating SQL Queries:**\n", + "\n", + "The `generate` method is where the actual translation occurs. Given a text prompt, the method encodes the prompt using the tokenizer, ensuring that it fits within the model's maximum length constraints. It then performs inference to generate the SQL query.\n", + "\n", + "The method parameters like `temperature` and `repetition_penalty` which we can tweak to control the creativity and quality of the generated queries!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10a0e504-46b7-4c6e-8d80-b28798607ed6", + "metadata": { + "jupyter": { + "source_hidden": true + } + }, + "outputs": [], + "source": [ + "os.environ[\"WANDB_DISABLED\"] = \"true\"\n", + "INFERENCE_DEVICE = torch.device(\"cpu\") # change this to `xpu` to use Intel GPU for inference \n", + "\n", + "def generate_prompt_sql(input_question, context, output=\"\"):\n", + " \"\"\"\n", + " Generates a prompt for fine-tuning the LLM model for text-to-SQL tasks.\n", + "\n", + " Parameters:\n", + " input_question (str): The input text or question to be converted to SQL.\n", + " context (str): The schema or context in which the SQL query operates.\n", + " output (str, optional): The expected SQL query as the output.\n", + "\n", + " Returns:\n", + " str: A formatted string serving as the prompt for the fine-tuning task.\n", + " \"\"\"\n", + " return f\"\"\"You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables. \n", + "\n", + "You must output the SQL query that answers the question.\n", + "\n", + "### Input:\n", + "{input_question}\n", + "\n", + "### Context:\n", + "{context}\n", + "\n", + "### Response:\n", + "{output}\"\"\"\n", + "\n", + "\n", + "def setup_model_and_tokenizer(base_model_id: str):\n", + " \"\"\"Downloads / Load the pre-trained model in 4bit and tokenizer based on the given base model ID for inference.\"\"\"\n", + " local_model_id = base_model_id.replace(\"/\", \"--\")\n", + " local_model_path = os.path.join(MODEL_CACHE_PATH, local_model_id)\n", + " print(f\"local model path is: {local_model_path}\")\n", + "\n", + " try:\n", + " model = AutoModelForCausalLM.from_pretrained(\n", + " local_model_path,\n", + " load_in_4bit=True,\n", + " optimize_model=True,\n", + " use_cache=True,\n", + " torch_dtype=torch.float16,\n", + " modules_to_not_convert=[\"lm_head\"],\n", + " )\n", + " except OSError:\n", + " logging.info(\n", + " f\"Model not found locally. Downloading {base_model_id} to cache...\"\n", + " )\n", + " model = AutoModelForCausalLM.from_pretrained(\n", + " local_model_path,\n", + " load_in_4bit=True,\n", + " optimize_model=True,\n", + " use_cache=True,\n", + " torch_dtype=torch.float16,\n", + " modules_to_not_convert=[\"lm_head\"],\n", + " )\n", + "\n", + " try:\n", + " if \"llama\" in base_model_id.lower():\n", + " tokenizer = LlamaTokenizer.from_pretrained(local_model_path)\n", + " else:\n", + " tokenizer = AutoTokenizer.from_pretrained(local_model_path)\n", + " except OSError:\n", + " logging.info(\n", + " f\"Tokenizer not found locally. Downloading tokenizer for {base_model_id} to cache...\"\n", + " )\n", + " if \"llama\" in base_model_id.lower():\n", + " tokenizer = LlamaTokenizer.from_pretrained(base_model_id)\n", + " else:\n", + " tokenizer = AutoTokenizer.from_pretrained(base_model_id)\n", + " tokenizer.pad_token_id = 0\n", + " tokenizer.padding_side = \"left\"\n", + " return model, tokenizer\n", + "\n", + "class TextToSQLGenerator:\n", + " \"\"\"Handles SQL query generation for a given text prompt.\"\"\"\n", + "\n", + " def __init__(\n", + " self, base_model_id=BASE_MODEL, use_adapter=False, lora_checkpoint=None, loaded_base_model=None\n", + " ):\n", + " \"\"\"\n", + " Initialize the InferenceModel class.\n", + " Parameters:\n", + " use_adapter (bool, optional): Whether to use LoRA model. Defaults to False.\n", + " \"\"\"\n", + " try:\n", + " if loaded_base_model:\n", + " self.model = loaded_base_model.model\n", + " self.tokenizer = loaded_base_model.tokenizer\n", + " else:\n", + " self.model, self.tokenizer = setup_model_and_tokenizer(base_model_id)\n", + " if use_adapter:\n", + " self.model = PeftModel.from_pretrained(self.model, lora_checkpoint)\n", + " except Exception as e:\n", + " logging.error(f\"Exception occurred during model initialization: {e}\")\n", + " raise\n", + "\n", + " self.model.to(INFERENCE_DEVICE)\n", + " self.max_length = 512\n", + "\n", + "\n", + " def generate(self, prompt, **kwargs):\n", + " \"\"\"Generates an SQL query based on the given prompt.\n", + " Parameters:\n", + " prompt (str): The SQL prompt.\n", + " Returns:\n", + " str: The generated SQL query.\n", + " \"\"\"\n", + " try:\n", + " encoded_prompt = self.tokenizer(\n", + " prompt,\n", + " truncation=True,\n", + " max_length=self.max_length,\n", + " padding=False,\n", + " return_tensors=\"pt\",\n", + " ).input_ids.to(INFERENCE_DEVICE)\n", + " with torch.no_grad():\n", + " with torch.xpu.amp.autocast():\n", + " outputs = self.model.generate(\n", + " input_ids=encoded_prompt,\n", + " do_sample=True,\n", + " max_length=self.max_length,\n", + " temperature=0.3,\n", + " repetition_penalty=1.2,\n", + " )\n", + " generated = self.tokenizer.decode(outputs[0], skip_special_tokens=True)\n", + " return generated\n", + " except Exception as e:\n", + " logging.error(f\"Exception occurred during query generation: {e}\")\n", + " raise" + ] + }, + { + "cell_type": "markdown", + "id": "dfb1c247-f6e2-4ccd-bdc6-26f41ea63c55", + "metadata": {}, + "source": [ + "---\n", + "**Generate SQL from Natural Language!** 🚀 \n", + "\n", + "**With `TextToSQLGenerator`:**\n", + "- Compare base model 🆚 LoRA model.\n", + "- Instantiate with different `use_adapter` settings for side-by-side comparison.\n", + "\n", + "**Things to try out:**\n", + "\n", + "1. **Select a Natural Language Question** 🗣️: Use a prompt or sample data (see samples dict below) for SQL translation.\n", + "2. **Base Model SQL Generation** 🏗️: Generate SQL from the prompt using the base model.\n", + "3. **Fine-Tuned Model SQL Generation** ✨: Generate SQL with the fine-tuned model; note improvements.\n", + "4. **Compare Outputs** 🔍: Evaluate both SQL queries for accuracy to compare both models.\n", + "5. **Iterate and Refine** 🔁: Adjust training parameters or dataset and finetune again if required.\n", + "6. **Integrate with 🗂️ LlamaIndex 🦙**: Use frameworks like [LlamaIndex](https://github.com/run-llama/llama_index) to integrated your finetuned model for querying a database using natural language.\n" + ] + }, + { + "cell_type": "markdown", + "id": "94c63bc1-9e86-4760-ac9b-c915969ee5fb", + "metadata": {}, + "source": [ + "**Now let's see how our model performance, Let's generate some SQL queries:**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dd2c6f25-5c45-4061-9b62-91233149b7a2", + "metadata": { + "jupyter": { + "source_hidden": true + } + }, + "outputs": [], + "source": [ + "# lets load base model for a baseline comparison\n", + "base_model = TextToSQLGenerator(\n", + " use_adapter=False,\n", + " lora_checkpoint=\"\",\n", + ") # setting use_adapter=False to use the base model\n", + "finetuned_model = None # comment out finetuned_model if you dont want to cache the finetuned model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b8417146-0c28-4996-81a4-c4b0857d81e7", + "metadata": { + "jupyter": { + "source_hidden": true + } + }, + "outputs": [], + "source": [ + "import json\n", + "\n", + "from IPython.display import display, HTML\n", + "\n", + "INFERENCE_DEVICE = torch.device(\"cpu\") \n", + "\n", + "\n", + "# let's use some fake sample data\n", + "samples = \"\"\"\n", + "[\n", + " {\n", + " \"question\": \"What is the capacity of the stadium where the team 'Mountain Eagles' plays?\",\n", + " \"context\": \"CREATE TABLE stadium_info (team_name VARCHAR, stadium_name VARCHAR, capacity INT)\"\n", + " },\n", + " {\n", + " \"question\": \"How many goals did player John Smith score last season?\",\n", + " \"context\": \"CREATE TABLE player_stats (player_name VARCHAR, goals_scored INT, season VARCHAR)\"\n", + " },\n", + " {\n", + " \"question\": \"What are the operating hours for the Central Library on weekends?\",\n", + " \"context\": \"CREATE TABLE library_hours (library_name VARCHAR, day_of_week VARCHAR, open_time TIME, close_time TIME)\"\n", + " }\n", + "]\n", + "\"\"\"\n", + "\n", + "def _extract_sections(output):\n", + " input_section = output.split(\"### Input:\")[1].split(\"### Context:\")[0]\n", + " context_section = output.split(\"### Context:\")[1].split(\"### Response:\")[0]\n", + " response_section = output.split(\"### Response:\")[1]\n", + " return input_section, context_section, response_section\n", + "\n", + "def run_inference(sample_data, model, finetuned=False):\n", + " if INFERENCE_DEVICE.type.startswith(\"xpu\"):\n", + " torch.xpu.empty_cache()\n", + " \n", + " color = \"#4CAF52\" if finetuned else \"#2196F4\"\n", + " model_type = \"finetuned\" if finetuned else \"base\"\n", + " display(HTML(f\"
Processing queries on {INFERENCE_DEVICE} please wait...
\"))\n", + " \n", + " for index, row in enumerate(sample_data):\n", + " try:\n", + " prompt = generate_prompt_sql(row[\"question\"], context=row[\"context\"])\n", + " output = model.generate(prompt) \n", + " input_section, context_section, response_section = _extract_sections(output)\n", + " \n", + " tabbed_output = f\"\"\"\n", + "
\n", + " {model_type} model - Sample {index+1} (Click to expand)\n", + "
\n", + "

Expected input 📝:
{input_section}

\n", + "

Expected context 📚:
{context_section}

\n", + "

Generated response 💡:
{response_section}

\n", + "
\n", + "
\n", + "
\"\"\" # Subtle separator\n", + " display(HTML(tabbed_output))\n", + " except Exception as e:\n", + " logging.error(f\"Exception occurred during sample processing: {e}\")\n", + "\n", + "\n", + "\n", + "\n", + "# checkpoints are saved to `./lora_adapters`.\n", + "# Update the USING_CHECKPOINT to the one you want to use.\n", + "USING_CHECKPOINT=200\n", + "# if the kernel is interrupted the latest adapter (LORA_CHECKPOINT) is `./final_model_interrupted/`\n", + "# or else, the final model LORA_CHECKPOINT is `./final_model`\n", + "LORA_CHECKPOINT = f\"./lora_adapters/checkpoint-{USING_CHECKPOINT}/\"\n", + "\n", + "if os.path.exists(LORA_CHECKPOINT):\n", + " sample_data = json.loads(samples)\n", + " run_inference(sample_data, model=base_model)\n", + " if not finetuned_model:\n", + " finetuned_model = TextToSQLGenerator(\n", + " use_adapter=True,\n", + " lora_checkpoint=LORA_CHECKPOINT,\n", + " loaded_base_model=base_model\n", + " )\n", + " run_inference(sample_data, model=finetuned_model, finetuned=True)\n", + "\n", + " # To conserve memory we can delete the model\n", + " #del finetuned_model\n", + " #del base_model" + ] + }, + { + "cell_type": "markdown", + "id": "11c2506c", + "metadata": {}, + "source": [ + "---\n", + "**Conclusion** 👏\n", + "\n", + "We've successfully navigated the process of selecting and fine-tuning a foundational LLM model on Intel GPUs, showcasing its SQL generation capabilities. I hope that I have been able to highlight the potential of customizing language models for specific tasks and on how to efficiently finetune LLMs on Intel XPUs. As a suggestion for your continued journey, consider experimenting with different models, adjusting inference settings, and exploring various LoRA configurations to refine your results. Keep exploring!\n", + "\n", + "---\n" + ] + }, + { + "cell_type": "markdown", + "id": "c458cac4", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "jupyter": { + "source_hidden": true + } + }, + "source": [ + "\n", + "**Disclaimer for Using Large Language Models**\n", + "\n", + "Please be aware that while Large Language Models are powerful tools for text generation, they may sometimes produce results that are unexpected, biased, or inconsistent with the given prompt. It's advisable to carefully review the generated text and consider the context and application in which you are using these models.\n", + "\n", + "For detailed information on each model's capabilities, licensing, and attribution, please refer to the respective model cards:\n", + "\n", + "1. **Open LLaMA 3B v2**\n", + " - Model Card: [openlm-research/open_llama_3b_v2](https://huggingface.co/openlm-research/open_llama_3b_v2)\n", + "\n", + "2. **Open LLaMA 13B**\n", + " - Model Card: [openlm-research/open_llama_13b](https://huggingface.co/openlm-research/open_llama_13b)\n", + "\n", + "3. **Nous-Hermes LLaMA 2-7B**\n", + " - Model Card: [NousResearch/Nous-Hermes-llama-2-7b](https://huggingface.co/NousResearch/Nous-Hermes-llama-2-7b)\n", + "\n", + "4. **LLaMA 2-7B Chat HF**\n", + " - Model Card: [NousResearch/Llama-2-7b-chat-hf](https://huggingface.co/NousResearch/Llama-2-7b-chat-hf)\n", + "\n", + "5. **LLaMA 2-13B HF**\n", + " - Model Card: [NousResearch/Llama-2-13b-hf](https://huggingface.co/NousResearch/Llama-2-13b-hf)\n", + "\n", + "6. **CodeLlama 7B HF**\n", + " - Model Card: [NousResearch/CodeLlama-7b-hf](https://huggingface.co/NousResearch/CodeLlama-7b-hf)\n", + "\n", + "7. **Phind-CodeLlama 34B v2**\n", + " - Model Card: [Phind/Phind-CodeLlama-34B-v2](https://huggingface.co/Phind/Phind-CodeLlama-34B-v2)\n", + "\n", + "8. **Zephyr-7b-beta**\n", + " - Model Card: [HuggingFaceH4/zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta)\n", + "\n", + "\n", + "Usage of these models must also adhere to the licensing agreements and be in accordance with ethical guidelines and best practices for AI. If you have any concerns or encounter issues with the models, please refer to the respective model cards and documentation provided in the links above.\n", + "To the extent that any public or non-Intel datasets or models are referenced by or accessed using these materials those datasets or models are provided by the third party indicated as the content source. Intel does not create the content and does not warrant its accuracy or quality. By accessing the public content, or using materials trained on or with such content, you agree to the terms associated with that content and that your use complies with the applicable license.\n", + "\n", + " \n", + "Intel expressly disclaims the accuracy, adequacy, or completeness of any such public content, and is not liable for any errors, omissions, or defects in the content, or for any reliance on the content. Intel is not liable for any liability or damages relating to your use of public content.\n", + "\n", + "Intel’s provision of these resources does not expand or otherwise alter Intel’s applicable published warranties or warranty disclaimers for Intel products or solutions, and no additional obligations, indemnifications, or liabilities arise from Intel providing such resources. Intel reserves the right, without notice, to make corrections, enhancements, improvements, and other changes to its materials.\n", + "\n", + "---\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "pytorch-gpu", + "language": "python", + "name": "pytorch-gpu" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/architects_of_future/IDC notebook/simple_llm_inference.ipynb b/architects_of_future/IDC notebook/simple_llm_inference.ipynb new file mode 100644 index 00000000..4fe742f5 --- /dev/null +++ b/architects_of_future/IDC notebook/simple_llm_inference.ipynb @@ -0,0 +1,812 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "8b40b326-4549-4b2b-8ce1-a453fbaa7a19", + "metadata": {}, + "source": [ + "SPDX-License-Identifier: Apache-2.0\n", + "Copyright (c) 2023, Rahul Unnikrishnan Nair " + ] + }, + { + "cell_type": "markdown", + "id": "1d173d6a-a86d-441a-b36a-efed57814310", + "metadata": {}, + "source": [ + "---\n", + "**Simple LLM Inference: Playing with Language Models on Intel® Data Center Max Series GPUs**" + ] + }, + { + "cell_type": "markdown", + "id": "c86f2e84-54e8-4c1d-bfec-302f9dff577d", + "metadata": {}, + "source": [ + "Hello and welcome! Are you curious about how computers understand and generate human-like text? Do you want to play around with text generation without getting too technical? Then you've come to the right place.\n", + "\n", + "Large Language Models (LLMs) have a wide range of applications, but they can also be fun to experiment with. Here, we'll use some simple pre-trained models to explore text generation interactively.\n", + "\n", + "Powered by Intel® Data Center GPU Max 1100s, this notebook provides a hands-on experience that doesn't require deep technical knowledge. Whether you're a student, writer, educator, or just curious about AI, this guide is designed for you.\n", + "\n", + "Ready to try it out? Let's set up our environment and start exploring the world of text generation with LLMs!\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e47145a5-c2b2-4957-8ce0-51c0fb1bf9a5", + "metadata": {}, + "outputs": [], + "source": [ + "# Required packages, install if not installed (assume PyTorch* and Intel® Extension for PyTorch* is already present)\n", + "#import sys\n", + "#!conda install -y --quiet --prefix {sys.prefix} -c conda-forge \\\n", + "# accelerate==0.23.0 \\\n", + "# validators==0.22.0 \\\n", + "# transformers==4.32.1 \\\n", + "# sentencepiece \\\n", + "# pillow \\\n", + "# ipywidgets \\\n", + "# ipython > /dev/null && echo \"Installation successful\" || echo \"Installation failed\"\n", + "\n", + "import sys\n", + "import site\n", + "from pathlib import Path\n", + "\n", + "!echo \"Installation in progress...\"\n", + "!{sys.executable} -m pip install -U transformers==4.35.2 --no-warn-script-location > /dev/null && echo \"Installation successful\" || echo \"Installation failed\"\n", + "\n", + "def get_python_version():\n", + " return \"python\" + \".\".join(map(str, sys.version_info[:2]))\n", + "\n", + "def set_local_bin_path():\n", + " local_bin = str(Path.home() / \".local\" / \"bin\") \n", + " local_site_packages = str(\n", + " Path.home() / \".local\" / \"lib\" / get_python_version() / \"site-packages\"\n", + " )\n", + " sys.path.append(local_bin)\n", + " sys.path.insert(0, site.getusersitepackages())\n", + " sys.path.insert(0, sys.path.pop(sys.path.index(local_site_packages)))\n", + "\n", + "set_local_bin_path()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1abc93ee-117e-4d10-a97c-c26429b38159", + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "import os\n", + "import random\n", + "import re\n", + "\n", + "os.environ[\"SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS\"] = \"1\"\n", + "os.environ[\"ENABLE_SDP_FUSION\"] = \"1\"\n", + "import warnings\n", + "\n", + "# Suppress warnings for a cleaner output\n", + "warnings.filterwarnings(\"ignore\")\n", + "\n", + "import torch\n", + "import intel_extension_for_pytorch as ipex\n", + "\n", + "from transformers import AutoModelForCausalLM, AutoTokenizer\n", + "from transformers import LlamaTokenizer, LlamaForCausalLM\n", + "from transformers import BertTokenizer, BertForSequenceClassification\n", + "\n", + "from ipywidgets import VBox, HBox, Button, Dropdown, IntSlider, FloatSlider, Text, Output, Label, Layout\n", + "import ipywidgets as widgets\n", + "from ipywidgets import HTML\n", + "\n", + "\n", + "# random seed\n", + "if torch.xpu.is_available():\n", + " seed = 88\n", + " random.seed(seed)\n", + " torch.xpu.manual_seed(seed)\n", + " torch.xpu.manual_seed_all(seed)\n", + "\n", + "def select_device(preferred_device=None):\n", + " \"\"\"\n", + " Selects the best available XPU device or the preferred device if specified.\n", + "\n", + " Args:\n", + " preferred_device (str, optional): Preferred device string (e.g., \"cpu\", \"xpu\", \"xpu:0\", \"xpu:1\", etc.). If None, a random available XPU device will be selected or CPU if no XPU devices are available.\n", + "\n", + " Returns:\n", + " torch.device: The selected device object.\n", + " \"\"\"\n", + " try:\n", + " if preferred_device and preferred_device.startswith(\"cpu\"):\n", + " print(\"Using CPU.\")\n", + " return torch.device(\"cpu\")\n", + " if preferred_device and preferred_device.startswith(\"xpu\"):\n", + " if preferred_device == \"xpu\" or (\n", + " \":\" in preferred_device\n", + " and int(preferred_device.split(\":\")[1]) >= torch.xpu.device_count()\n", + " ):\n", + " preferred_device = (\n", + " None # Handle as if no preferred device was specified\n", + " )\n", + " else:\n", + " device = torch.device(preferred_device)\n", + " if device.type == \"xpu\" and device.index < torch.xpu.device_count():\n", + " vram_used = torch.xpu.memory_allocated(device) / (\n", + " 1024**2\n", + " ) # In MB\n", + " print(\n", + " f\"Using preferred device: {device}, VRAM used: {vram_used:.2f} MB\"\n", + " )\n", + " return device\n", + "\n", + " if torch.xpu.is_available():\n", + " device_id = random.choice(\n", + " range(torch.xpu.device_count())\n", + " ) # Select a random available XPU device\n", + " device = torch.device(f\"xpu:{device_id}\")\n", + " vram_used = torch.xpu.memory_allocated(device) / (1024**2) # In MB\n", + " print(f\"Selected device: {device}, VRAM used: {vram_used:.2f} MB\")\n", + " return device\n", + " except Exception as e:\n", + " print(f\"An error occurred while selecting the device: {e}\")\n", + " print(\"No XPU devices available or preferred device not found. Using CPU.\")\n", + " return torch.device(\"cpu\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "416381a3-6293-4701-a8fa-76f1402037dc", + "metadata": {}, + "source": [ + "---\n", + "**A Glimpse Into Text Generation with Language Models**\n", + "\n", + "If you're intrigued by how machines can generate human-like text, let's take a closer look at the underlying code. Even if you're not technically inclined, this section will provide a high-level understanding of how it all works.:\n", + "\n", + "- **Class Definition**: The `ChatBotModel` class is the core of our text generation. It handles the setup, optimization, and interaction with the LLM (Large Language Model).\n", + "\n", + "- **Initialization**: When you create an instance of this class, you can specify the model's path, the device to run on (defaulting to Intel's \"xpu\" device if available), and the data type. There's also an option to optimize the model for Intel GPUs using Intel Extension For PyTorch (IPEX).\n", + "\n", + "- **Input Preparation**: The `prepare_input` method ensures that the input doesn't exceed the maximum length and combines the previous text with the user input, if required.\n", + "\n", + "- **Output Generation**: The `gen_output` method takes the prepared input and several parameters controlling the generation process, like temperature, top_p, top_k, etc., and produces the text response.\n", + "\n", + "- **Warm-up**: Before the main interactions, the `warmup_model` method helps in \"warming up\" the model to make subsequent runs faster.\n", + "\n", + "- **Text Processing**: Several methods like `unique_sentences`, `remove_repetitions`, and `extract_bot_response` handle the text processing to ensure the generated text is readable and free from repetitions or unnecessary echoes.\n", + "\n", + "Feel free to explore the code and play around with different parameters. Remember, this is a simple and interactive way to experiment with text generation. It's not a cutting-edge chatbot, but rather a playful tool to engage with language models. Enjoy the journey into the world of LLMs, using Intel® Data Center GPU Max 1100s!\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa2f29e9-cd41-4605-aafc-f5aaaf440469", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "MODEL_CACHE_PATH = \"/home/common/data/Big_Data/GenAI/llm_models\"\n", + "class ChatBotModel:\n", + " \"\"\"\n", + " ChatBotModel is a class for generating responses based on text prompts using a pretrained model.\n", + "\n", + " Attributes:\n", + " - device: The device to run the model on. Default is \"xpu\" if available, otherwise \"cpu\".\n", + " - model: The loaded model for text generation.\n", + " - tokenizer: The loaded tokenizer for the model.\n", + " - torch_dtype: The data type to use in the model.\n", + " \"\"\"\n", + "\n", + " def __init__(\n", + " self,\n", + " model_id_or_path: str = \"openlm-research/open_llama_3b_v2\", # \"Writer/camel-5b-hf\",\n", + " torch_dtype: torch.dtype = torch.bfloat16,\n", + " optimize: bool = True,\n", + " ) -> None:\n", + " \"\"\"\n", + " The initializer for ChatBotModel class.\n", + "\n", + " Parameters:\n", + " - model_id_or_path: The identifier or path of the pretrained model.\n", + " - torch_dtype: The data type to use in the model. Default is torch.bfloat16.\n", + " - optimize: If True, ipex is used to optimized the model\n", + " \"\"\"\n", + " self.torch_dtype = torch_dtype\n", + " self.device = select_device(\"xpu\")\n", + " self.model_id_or_path = model_id_or_path\n", + " local_model_id = self.model_id_or_path.replace(\"/\", \"--\")\n", + " local_model_path = os.path.join(MODEL_CACHE_PATH, local_model_id)\n", + "\n", + " if (\n", + " self.device == self.device.startswith(\"xpu\")\n", + " if isinstance(self.device, str)\n", + " else self.device.type == \"xpu\"\n", + " ):\n", + "\n", + " self.autocast = torch.xpu.amp.autocast\n", + " else:\n", + " self.autocast = torch.cpu.amp.autocast\n", + " self.torch_dtype = torch_dtype\n", + " try:\n", + " if \"llama\" in model_id_or_path:\n", + " self.tokenizer = LlamaTokenizer.from_pretrained(local_model_path)\n", + " self.model = (\n", + " LlamaForCausalLM.from_pretrained(\n", + " local_model_path,\n", + " low_cpu_mem_usage=True,\n", + " torch_dtype=self.torch_dtype,\n", + " )\n", + " .to(self.device)\n", + " .eval()\n", + " )\n", + " else:\n", + " self.tokenizer = AutoTokenizer.from_pretrained(\n", + " local_model_path, trust_remote_code=True\n", + " )\n", + " self.model = (\n", + " AutoModelForCausalLM.from_pretrained(\n", + " local_model_path,\n", + " low_cpu_mem_usage=True,\n", + " trust_remote_code=True,\n", + " torch_dtype=self.torch_dtype,\n", + " )\n", + " .to(self.device)\n", + " .eval()\n", + " )\n", + " except (OSError, ValueError, EnvironmentError) as e:\n", + " logging.info(\n", + " f\"Tokenizer / model not found locally. Downloading tokenizer / model for {self.model_id_or_path} to cache...: {e}\"\n", + " )\n", + " if \"llama\" in model_id_or_path:\n", + " self.tokenizer = LlamaTokenizer.from_pretrained(self.model_id_or_path)\n", + " self.model = (\n", + " LlamaForCausalLM.from_pretrained(\n", + " self.model_id_or_path,\n", + " low_cpu_mem_usage=True,\n", + " torch_dtype=self.torch_dtype,\n", + " )\n", + " .to(self.device)\n", + " .eval()\n", + " )\n", + " else:\n", + " self.tokenizer = AutoTokenizer.from_pretrained(\n", + " self.model_id_or_path, trust_remote_code=True\n", + " )\n", + " self.model = (\n", + " AutoModelForCausalLM.from_pretrained(\n", + " self.model_id_or_path,\n", + " low_cpu_mem_usage=True,\n", + " trust_remote_code=True,\n", + " torch_dtype=self.torch_dtype,\n", + " )\n", + " .to(self.device)\n", + " .eval()\n", + " )\n", + " \n", + " self.max_length = 256\n", + "\n", + " if optimize:\n", + " if hasattr(ipex, \"optimize_transformers\"):\n", + " try:\n", + " ipex.optimize_transformers(self.model, dtype=self.torch_dtype)\n", + " except:\n", + " ipex.optimize(self.model, dtype=self.torch_dtype)\n", + " else:\n", + " ipex.optimize(self.model, dtype=self.torch_dtype)\n", + "\n", + " def prepare_input(self, previous_text, user_input):\n", + " \"\"\"Prepare the input for the model, ensuring it doesn't exceed the maximum length.\"\"\"\n", + " response_buffer = 100\n", + " user_input = (\n", + " \"Below is an instruction that describes a task. \"\n", + " \"Write a response that appropriately completes the request.\\n\\n\"\n", + " f\"### Instruction:\\n{user_input}\\n\\n### Response:\")\n", + " combined_text = previous_text + \"\\nUser: \" + user_input + \"\\nBot: \"\n", + " input_ids = self.tokenizer.encode(\n", + " combined_text, return_tensors=\"pt\", truncation=False\n", + " )\n", + " adjusted_max_length = self.max_length - response_buffer\n", + " if input_ids.shape[1] > adjusted_max_length:\n", + " input_ids = input_ids[:, -adjusted_max_length:]\n", + " return input_ids.to(device=self.device)\n", + "\n", + " def gen_output(\n", + " self, input_ids, temperature, top_p, top_k, num_beams, repetition_penalty\n", + " ):\n", + " \"\"\"\n", + " Generate the output text based on the given input IDs and generation parameters.\n", + "\n", + " Args:\n", + " input_ids (torch.Tensor): The input tensor containing token IDs.\n", + " temperature (float): The temperature for controlling randomness in Boltzmann distribution.\n", + " Higher values increase randomness, lower values make the generation more deterministic.\n", + " top_p (float): The cumulative distribution function (CDF) threshold for Nucleus Sampling.\n", + " Helps in controlling the trade-off between randomness and diversity.\n", + " top_k (int): The number of highest probability vocabulary tokens to keep for top-k-filtering.\n", + " num_beams (int): The number of beams for beam search. Controls the breadth of the search.\n", + " repetition_penalty (float): The penalty applied for repeating tokens.\n", + "\n", + " Returns:\n", + " torch.Tensor: The generated output tensor.\n", + " \"\"\"\n", + " print(f\"Using max length: {self.max_length}\")\n", + " with self.autocast(\n", + " enabled=True if self.torch_dtype != torch.float32 else False,\n", + " dtype=self.torch_dtype,\n", + " ):\n", + " with torch.no_grad():\n", + " output = self.model.generate(\n", + " input_ids,\n", + " pad_token_id=self.tokenizer.eos_token_id,\n", + " max_length=self.max_length,\n", + " temperature=temperature,\n", + " top_p=top_p,\n", + " top_k=top_k,\n", + " num_beams=num_beams,\n", + " repetition_penalty=repetition_penalty,\n", + " )\n", + " return output\n", + "\n", + " def warmup_model(\n", + " self, temperature, top_p, top_k, num_beams, repetition_penalty\n", + " ) -> None:\n", + " \"\"\"\n", + " Warms up the model by generating a sample response.\n", + " \"\"\"\n", + " sample_prompt = \"\"\"A dialog, where User interacts with a helpful Bot.\n", + " AI is helpful, kind, obedient, honest, and knows its own limits.\n", + " User: Hello, Bot.\n", + " Bot: Hello! How can I assist you today?\n", + " \"\"\"\n", + " input_ids = self.tokenizer(sample_prompt, return_tensors=\"pt\").input_ids.to(\n", + " device=self.device\n", + " )\n", + " _ = self.gen_output(\n", + " input_ids,\n", + " temperature=temperature,\n", + " top_p=top_p,\n", + " top_k=top_k,\n", + " num_beams=num_beams,\n", + " repetition_penalty=repetition_penalty,\n", + " )\n", + "\n", + " def strip_response(self, generated_text):\n", + " \"\"\"Remove ### Response: from string if exists.\"\"\"\n", + " match = re.search(r'### Response:(.*)', generated_text, re.S)\n", + " if match:\n", + " return match.group(1).strip()\n", + " \n", + " else:\n", + " return generated_text\n", + " \n", + " def unique_sentences(self, text: str) -> str:\n", + " sentences = text.split(\". \")\n", + " if sentences[-1] and sentences[-1][-1] != \".\":\n", + " sentences = sentences[:-1]\n", + " sentences = set(sentences)\n", + " return \". \".join(sentences) + \".\" if sentences else \"\"\n", + "\n", + " def remove_repetitions(self, text: str, user_input: str) -> str:\n", + " \"\"\"\n", + " Remove repetitive sentences or phrases from the generated text and avoid echoing user's input.\n", + "\n", + " Args:\n", + " text (str): The input text with potential repetitions.\n", + " user_input (str): The user's original input to check against echoing.\n", + "\n", + " Returns:\n", + " str: The processed text with repetitions and echoes removed.\n", + " \"\"\"\n", + " text = re.sub(re.escape(user_input), \"\", text, count=1).strip()\n", + " text = self.unique_sentences(text)\n", + " return text\n", + "\n", + " def extract_bot_response(self, generated_text: str) -> str:\n", + " \"\"\"\n", + " Extract the first response starting with \"Bot:\" from the generated text.\n", + "\n", + " Args:\n", + " generated_text (str): The full generated text from the model.\n", + "\n", + " Returns:\n", + " str: The extracted response starting with \"Bot:\".\n", + " \"\"\"\n", + " prefix = \"Bot:\"\n", + " generated_text = generated_text.replace(\"\\n\", \". \")\n", + " bot_response_start = generated_text.find(prefix)\n", + " if bot_response_start != -1:\n", + " response_start = bot_response_start + len(prefix)\n", + " end_of_response = generated_text.find(\"\\n\", response_start)\n", + " if end_of_response != -1:\n", + " return generated_text[response_start:end_of_response].strip()\n", + " else:\n", + " return generated_text[response_start:].strip()\n", + " return re.sub(r'^[^a-zA-Z0-9]+', '', generated_text)\n", + "\n", + " def interact(\n", + " self,\n", + " out: Output, # Output widget to display the conversation\n", + " with_context: bool = True,\n", + " temperature: float = 0.10,\n", + " top_p: float = 0.95,\n", + " top_k: int = 40,\n", + " num_beams: int = 3,\n", + " repetition_penalty: float = 1.80,\n", + " ) -> None:\n", + " \"\"\"\n", + " Handle the chat loop where the user provides input and receives a model-generated response.\n", + "\n", + " Args:\n", + " with_context (bool): Whether to consider previous interactions in the session. Default is True.\n", + " temperature (float): The temperature for controlling randomness in Boltzmann distribution.\n", + " Higher values increase randomness, lower values make the generation more deterministic.\n", + " top_p (float): The cumulative distribution function (CDF) threshold for Nucleus Sampling.\n", + " Helps in controlling the trade-off between randomness and diversity.\n", + " top_k (int): The number of highest probability vocabulary tokens to keep for top-k-filtering.\n", + " num_beams (int): The number of beams for beam search. Controls the breadth of the search.\n", + " repetition_penalty (float): The penalty applied for repeating tokens.\n", + " \"\"\"\n", + " previous_text = \"\"\n", + " \n", + " def display_user_input_widgets():\n", + " default_color = \"\\033[0m\"\n", + " user_color, user_icon = \"\\033[94m\", \"😀 \"\n", + " bot_color, bot_icon = \"\\033[92m\", \"🤖 \"\n", + " user_input_widget = Text(placeholder=\"Type your message here...\", layout=Layout(width='80%'))\n", + " send_button = Button(description=\"Send\", button_style = \"primary\", layout=Layout(width='10%'))\n", + " chat_spin = HTML(value = \"\")\n", + " spin_style = \"\"\"\n", + "
\n", + " \n", + " \"\"\"\n", + " display(HBox([chat_spin, user_input_widget, send_button, ]))\n", + " \n", + " def on_send(button):\n", + " nonlocal previous_text\n", + " send_button.button_style = \"warning\"\n", + " chat_spin.value = spin_style\n", + " orig_input = \"\"\n", + " user_input = user_input_widget.value\n", + " with out:\n", + " print(f\" {user_color}{user_icon}You: {user_input}{default_color}\")\n", + " if user_input.lower() == \"exit\":\n", + " return\n", + " if \"camel\" in self.model_id_or_path:\n", + " orig_input = user_input\n", + " user_input = (\n", + " \"Below is an instruction that describes a task. \"\n", + " \"Write a response that appropriately completes the request.\\n\\n\"\n", + " f\"### Instruction:\\n{user_input}\\n\\n### Response:\")\n", + " if with_context:\n", + " self.max_length = 256\n", + " input_ids = self.prepare_input(previous_text, user_input)\n", + " else:\n", + " self.max_length = 96\n", + " input_ids = self.tokenizer.encode(user_input, return_tensors=\"pt\").to(self.device)\n", + " \n", + " output_ids = self.gen_output(\n", + " input_ids,\n", + " temperature=temperature,\n", + " top_p=top_p,\n", + " top_k=top_k,\n", + " num_beams=num_beams,\n", + " repetition_penalty=repetition_penalty,\n", + " )\n", + " generated_text = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)\n", + " generated_text = self.strip_response(generated_text)\n", + " generated_text = self.extract_bot_response(generated_text)\n", + " generated_text = self.remove_repetitions(generated_text, user_input)\n", + " send_button.button_style = \"success\"\n", + " chat_spin.value = \"\"\n", + "\n", + " with out:\n", + " if orig_input:\n", + " user_input = orig_input\n", + " print(f\" {bot_color}{bot_icon}Bot: {generated_text}{default_color}\") \n", + " if with_context:\n", + " previous_text += \"\\nUser: \" + user_input + \"\\nBot: \" + generated_text\n", + " user_input_widget.value = \"\" \n", + " display_user_input_widgets()\n", + " send_button.on_click(on_send)\n", + " display_user_input_widgets()" + ] + }, + { + "cell_type": "markdown", + "id": "d3a61ad2-5155-4b07-9f10-d9d18f252e8f", + "metadata": {}, + "source": [ + "---\n", + "**Setting Up the Interactive Text Generation Interface**\n", + "\n", + "In the next section, we'll create an interactive text generation interface right here in this notebook. This will enable you to select a model, provide a prompt, and tweak various parameters without touching the code itself.\n", + "\n", + "- **Model Selection**: Choose from available pre-trained models or enter a custom model from the HuggingFace Hub.\n", + "- **Interaction Mode**: Decide whether to interact with or without context, allowing the model to remember previous interactions or treat each input independently.\n", + "- **Temperature**: Adjust this parameter to control the randomness in text generation. Higher values increase creativity; lower values make the generation more deterministic.\n", + "- **Top_p, Top_k**: Play with these parameters to influence the diversity and quality of the generated text.\n", + "- **Number of Beams**: Control the breadth of the search in text generation.\n", + "- **Repetition Penalty**: Modify this to prevent or allow repeated phrases and sentences.\n", + "\n", + "Once you've set your preferences, you can start the interaction and even reset or reload the model to try different settings. Let's set this up and explore the playful world of text generation using Intel® Data Center GPU Max 1100s!\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "42264502-ffdd-47e1-af1e-f7407c4b365f", + "metadata": {}, + "outputs": [], + "source": [ + "model_cache = {}\n", + "\n", + "from ipywidgets import HTML\n", + "def interact_with_llm():\n", + " models = [\"Writer/camel-5b-hf\", \n", + " \"openlm-research/open_llama_3b_v2\",\n", + " \"Intel/neural-chat-7b-v3\", \n", + " \"Intel/neural-chat-7b-v3-1\", # https://huggingface.co/Intel/neural-chat-7b-v3-1 - checkout the prompting template on the site to get better response.\n", + " \"HuggingFaceH4/zephyr-7b-beta\", \n", + " \"tiiuae/falcon-7b\"\n", + " ]\n", + " interaction_modes = [\"Interact with context\", \"Interact without context\"]\n", + " model_dropdown = Dropdown(options=models, value=models[0], description=\"Model:\")\n", + " interaction_mode = Dropdown(options=interaction_modes, value=interaction_modes[1], description=\"Interaction:\")\n", + " temperature_slider = FloatSlider(value=0.71, min=0, max=1, step=0.01, description=\"Temperature:\")\n", + " top_p_slider = FloatSlider(value=0.95, min=0, max=1, step=0.01, description=\"Top P:\")\n", + " top_k_slider = IntSlider(value=40, min=0, max=100, step=1, description=\"Top K:\")\n", + " num_beams_slider = IntSlider(value=3, min=1, max=10, step=1, description=\"Num Beams:\")\n", + " repetition_penalty_slider = FloatSlider(value=1.80, min=0, max=2, step=0.1, description=\"Rep Penalty:\")\n", + " \n", + " out = Output() \n", + " left_panel = VBox([model_dropdown, interaction_mode], layout=Layout(margin=\"0px 20px 10px 0px\"))\n", + " right_panel = VBox([temperature_slider, top_p_slider, top_k_slider, num_beams_slider, repetition_penalty_slider],\n", + " layout=Layout(margin=\"0px 0px 10px 20px\"))\n", + " user_input_widgets = HBox([left_panel, right_panel], layout=Layout(margin=\"0px 50px 10px 0px\"))\n", + " spinner = HTML(value=\"\")\n", + " start_button = Button(description=\"Start Interaction!\", button_style=\"primary\")\n", + " start_button_spinner = HBox([start_button, spinner])\n", + " start_button_spinner.layout.margin = '0 auto'\n", + " display(user_input_widgets)\n", + " display(start_button_spinner)\n", + " display(out)\n", + " \n", + " def on_start(button):\n", + " start_button.button_style = \"warning\"\n", + " start_button.description = \"Loading...\"\n", + " spinner.value = \"\"\"\n", + "
\n", + " \n", + " \"\"\"\n", + " out.clear_output()\n", + " with out:\n", + " print(\"\\nSetting up the model, please wait...\")\n", + " #out.clear_output()\n", + " model_choice = model_dropdown.value\n", + " with_context = interaction_mode.value == interaction_modes[0]\n", + " temperature = temperature_slider.value\n", + " top_p = top_p_slider.value\n", + " top_k = top_k_slider.value\n", + " num_beams = num_beams_slider.value\n", + " repetition_penalty = repetition_penalty_slider.value\n", + " model_key = (model_choice, \"xpu\")\n", + " if model_key not in model_cache:\n", + " model_cache[model_key] = ChatBotModel(model_id_or_path=model_choice)\n", + " bot = model_cache[model_key]\n", + " #if model_key not in model_cache:\n", + " # bot.warmup_model(\n", + " # temperature=temperature,\n", + " # top_p=top_p,\n", + " # top_k=top_k,\n", + " # num_beams=num_beams,\n", + " # repetition_penalty=repetition_penalty,\n", + " # )\n", + " \n", + " with out:\n", + " start_button.button_style = \"success\"\n", + " start_button.description = \"Refresh\"\n", + " spinner.value = \"\"\n", + " print(\"Ready!\")\n", + " print(\"\\nNote: This is a demonstration using pretrained models which were not fine-tuned for chat.\")\n", + " print(\"If the bot doesn't respond, try clicking on refresh.\\n\")\n", + " try:\n", + " with out:\n", + " bot.interact(\n", + " with_context=with_context,\n", + " out=out,\n", + " temperature=temperature,\n", + " top_p=top_p,\n", + " top_k=top_k,\n", + " num_beams=num_beams,\n", + " repetition_penalty=repetition_penalty,\n", + " )\n", + " except Exception as e:\n", + " with out:\n", + " print(f\"An error occurred: {e}\")\n", + "\n", + " start_button.on_click(on_start)\n" + ] + }, + { + "cell_type": "markdown", + "id": "5d7322e1-abae-4a2a-9381-453dc0cf0c23", + "metadata": {}, + "source": [ + "---\n", + "**Let's Dive In and Have Some Fun with LLM Models!**\n", + "\n", + "Ready for a playful interaction with some interesting LLM models? The interface below lets you choose from different models and settings. Just select your preferences, click the \"Start Interaction!\" button, and you're ready to chat.\n", + "\n", + "You can ask questions, make statements, or simply explore how the model responds to different inputs. It's a friendly way to get acquainted with AI and see what it has to say.\n", + "\n", + "Remember, this is all in good fun, and the models are here to engage with you. So go ahead, start a conversation, and enjoy the interaction!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b754474c-8a98-4d55-bb69-cd98a8c209e3", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "interact_with_llm()" + ] + }, + { + "cell_type": "markdown", + "id": "2d0d5473-e7fe-4de0-a8c0-f49ba924f35d", + "metadata": {}, + "source": [ + "## Language Models Disclaimer and Information\n", + "\n", + "### Camel-5B\n", + "- **Model card:** [Camel-5B](https://huggingface.co/Writer/camel-5b-hf)\n", + "- **License:** Apache 2.0\n", + "- **Reference:**\n", + " ```bibtex\n", + " @misc{Camel,\n", + " author = {Writer Engineering team},\n", + " title = {{Camel-5B InstructGPT}},\n", + " howpublished = {\\url{https://dev.writer.com}},\n", + " year = 2023,\n", + " month = April \n", + " }\n", + " ```\n", + "\n", + "### OpenLLaMA 3b v2\n", + "- **Model card:** [OpenLLaMA 3b v2](https://huggingface.co/openlm-research/open_llama_3b_v2)\n", + "- **License:** Apache 2.0\n", + "- **References:**\n", + " ```bibtex\n", + " @software{openlm2023openllama,\n", + " author = {Geng, Xinyang and Liu, Hao},\n", + " title = {OpenLLaMA: An Open Reproduction of LLaMA},\n", + " month = May,\n", + " year = 2023,\n", + " url = {https://github.com/openlm-research/open_llama}\n", + " }\n", + " @software{together2023redpajama,\n", + " author = {Together Computer},\n", + " title = {RedPajama-Data: An Open Source Recipe to Reproduce LLaMA training dataset},\n", + " month = April,\n", + " year = 2023,\n", + " url = {https://github.com/togethercomputer/RedPajama-Data}\n", + " }\n", + " @article{touvron2023llama,\n", + " title={Llama: Open and efficient foundation language models},\n", + " author={Touvron, Hugo and Lavril, Thibaut and Izacard, Gautier and Martinet, Xavier and Lachaux, Marie-Anne and Lacroix, Timoth{\\'e}e and Rozi{\\`e}re, Baptiste and Goyal, Naman and Hambro, Eric and Azhar, Faisal and others},\n", + " journal={arXiv preprint arXiv:2302.13971},\n", + " year={2023}\n", + " }\n", + " ```\n", + "### Falcon 7B\n", + "\n", + "- **Model card:** [Falcon 7B](https://huggingface.co/tiiuae/falcon-7b)\n", + "- **License:** Apache 2.0\n", + "- **References:**\n", + " ```bibtex\n", + " @article{falcon40b,\n", + " title = {{Falcon-40B}: an open large language model with state-of-the-art performance},\n", + " author = {Almazrouei, Ebtesam and Alobeidli, Hamza and Alshamsi, Abdulaziz and Cappelli, Alessandro and Cojocaru, Ruxandra and Debbah, Merouane and Goffinet, Etienne and Heslow, Daniel and Launay, Julien and Malrtic, Quentin and Noune, Badreddine and Pannier, Baptiste and Penedo, Guilherme},\n", + " year={2023}\n", + " }\n", + " ```\n", + "### Zephyr 7B\n", + "\n", + "- **Model card:** [Zephyr 7B](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta)\n", + "- **License:** MIT\n", + "- **References:**\n", + " ```bibtex\n", + " @misc{alignment_handboox2023,\n", + " author = {Lewis Tunstall and Edward Beeching and Nathan Lambert and Nazneen Rajani and Alexander M. Rush and Thomas Wolf},\n", + " title = {The Alignment Handbook},\n", + " year = {2023},\n", + " publisher = {GitHub},\n", + " journal = {GitHub repository},\n", + " howpublished = {\\url{https://github.com/huggingface/alignment-handbook}}\n", + " }\n", + " ```\n", + "### Neural Chat 7b\n", + "- **Model card:** [Neural Chat](https://huggingface.co/Intel/neural-chat-7b-v3)\n", + "- **License:** Apache 2.0\n", + "\n", + "### Disclaimer for Using Large Language Models\n", + "\n", + "Please be aware that while Large Language Models like Camel-5B and OpenLLaMA 3b v2 are powerful tools for text generation, they may sometimes produce results that are unexpected, biased, or inconsistent with the given prompt. It's advisable to carefully review the generated text and consider the context and application in which you are using these models.\n", + "\n", + "Usage of these models must also adhere to the licensing agreements and be in accordance with ethical guidelines and best practices for AI. If you have any concerns or encounter issues with the models, please refer to the respective model cards and documentation provided in the links above.\n", + "\n", + "To the extent that any public or non-Intel datasets or models are referenced by or accessed using these materials those datasets or models are provided by the third party indicated as the content source. Intel does not create the content and does not warrant its accuracy or quality. By accessing the public content, or using materials trained on or with such content, you agree to the terms associated with that content and that your use complies with the applicable license.\n", + "\n", + " \n", + "Intel expressly disclaims the accuracy, adequacy, or completeness of any such public content, and is not liable for any errors, omissions, or defects in the content, or for any reliance on the content. Intel is not liable for any liability or damages relating to your use of public content.\n", + "\n", + "Intel’s provision of these resources does not expand or otherwise alter Intel’s applicable published warranties or warranty disclaimers for Intel products or solutions, and no additional obligations, indemnifications, or liabilities arise from Intel providing such resources. Intel reserves the right, without notice, to make corrections, enhancements, improvements, and other changes to its materials.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "pytorch-gpu", + "language": "python", + "name": "pytorch-gpu" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/architects_of_future/Intel Inference model check/Inference_time_without_intel-extension.ipynb b/architects_of_future/Intel Inference model check/Inference_time_without_intel-extension.ipynb new file mode 100644 index 00000000..300d8270 --- /dev/null +++ b/architects_of_future/Intel Inference model check/Inference_time_without_intel-extension.ipynb @@ -0,0 +1,132 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "84068c6d-79a1-4fee-a9c0-215b1141ea5a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Defaulting to user installation because normal site-packages is not writeable\n", + "Requirement already satisfied: transformers in /home/u8c206d93f653f456d22b878722b9483/.local/lib/python3.9/site-packages (4.34.1)\n", + "Requirement already satisfied: filelock in /home/u8c206d93f653f456d22b878722b9483/.local/lib/python3.9/site-packages (from transformers) (3.13.1)\n", + "Requirement already satisfied: huggingface-hub<1.0,>=0.16.4 in /home/u8c206d93f653f456d22b878722b9483/.local/lib/python3.9/site-packages (from transformers) (0.17.3)\n", + "Requirement already satisfied: numpy>=1.17 in /opt/intel/oneapi/intelpython/python3.9/lib/python3.9/site-packages (from transformers) (1.24.3)\n", + "Requirement already satisfied: packaging>=20.0 in /opt/intel/oneapi/intelpython/python3.9/lib/python3.9/site-packages (from transformers) (23.1)\n", + "Requirement already satisfied: pyyaml>=5.1 in /opt/intel/oneapi/intelpython/python3.9/lib/python3.9/site-packages (from transformers) (6.0)\n", + "Requirement already satisfied: regex!=2019.12.17 in /home/u8c206d93f653f456d22b878722b9483/.local/lib/python3.9/site-packages (from transformers) (2023.10.3)\n", + "Requirement already satisfied: requests in /opt/intel/oneapi/intelpython/python3.9/lib/python3.9/site-packages (from transformers) (2.31.0)\n", + "Requirement already satisfied: tokenizers<0.15,>=0.14 in /home/u8c206d93f653f456d22b878722b9483/.local/lib/python3.9/site-packages (from transformers) (0.14.1)\n", + "Requirement already satisfied: safetensors>=0.3.1 in /home/u8c206d93f653f456d22b878722b9483/.local/lib/python3.9/site-packages (from transformers) (0.4.1)\n", + "Requirement already satisfied: tqdm>=4.27 in /home/u8c206d93f653f456d22b878722b9483/.local/lib/python3.9/site-packages (from transformers) (4.66.1)\n", + "Requirement already satisfied: fsspec in /home/u8c206d93f653f456d22b878722b9483/.local/lib/python3.9/site-packages (from huggingface-hub<1.0,>=0.16.4->transformers) (2023.9.2)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/u8c206d93f653f456d22b878722b9483/.local/lib/python3.9/site-packages (from huggingface-hub<1.0,>=0.16.4->transformers) (4.8.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/intel/oneapi/intelpython/python3.9/lib/python3.9/site-packages (from requests->transformers) (3.1.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /opt/intel/oneapi/intelpython/python3.9/lib/python3.9/site-packages (from requests->transformers) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/u8c206d93f653f456d22b878722b9483/.local/lib/python3.9/site-packages (from requests->transformers) (1.26.15)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /opt/intel/oneapi/intelpython/python3.9/lib/python3.9/site-packages (from requests->transformers) (2023.7.22)\n" + ] + } + ], + "source": [ + "!pip install transformers\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "73ba3f3e-1c0a-41a7-8b50-2a7b2a0ee1ff", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "d162abb19fb34a39a99526088a34714d", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Loading checkpoint shards: 0%| | 0/8 [00:00 what is article 144 of indian " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/u8c206d93f653f456d22b878722b9483/.local/lib/python3.9/site-packages/transformers/generation/utils.py:1421: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use and modify the model generation configuration (see https://huggingface.co/docs/transformers/generation_strategies#default-text-generation-configuration )\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "law?\n", + "\n", + "Article 144 of the Indian Constitution is a provision that empowers the State to issue prohibitory orders in the interest of public order. It is also known as the \"Maintenance of Public Order\" provision. The State can impose restrictions on the right to freedom of speech, assembly, and movement under Article 19 of the Constitution in order to maintain public order. The orders issued under Article 144 can be preventive in nature, and are intended to prevent the occurrence of any untoward incidents or disturbances. The orders can be issued by the State Government or the District Magistrate, and can be in force for a specified period. The orders can also be extended to other areas if the situation so demands. The use of Article 144 is subject to certain conditions, such as the need for the orders to be reasonable, proportionate, and necessary in the circumstances. The orders must also be communicated to the public, and the affected persons must be given an opportunity to represent their views. The orders can be challenged in a court of law if they are deemed to be unreasonable or excessive.\n", + "Inference time: 124.3553466796875 seconds \n" + ] + } + ], + "source": [ + "from transformers import AutoTokenizer, TextStreamer\n", + "from transformers import AutoModelForCausalLM\n", + "import time\n", + "model_name = \"HuggingFaceH4/zephyr-7b-beta\" # Hugging Face model_id or local model\n", + "prompt = \"what is article 144 of indian law\"\n", + "\n", + "tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n", + "inputs = tokenizer(prompt, return_tensors=\"pt\").input_ids\n", + "streamer = TextStreamer(tokenizer)\n", + "start=time.time()\n", + "model = AutoModelForCausalLM.from_pretrained(model_name, load_in_8bit=False)\n", + "outputs = model.generate(inputs, streamer=streamer, max_new_tokens=300)\n", + "end=time.time()\n", + "inference_time=end-start\n", + "print(f\"Inference time: {inference_time} seconds \")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0e6f9e9-c098-4537-8961-df649fa904f6", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "tensorflow-gpu", + "language": "python", + "name": "tensorflow-gpu" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/architects_of_future/Intel Inference model check/simple_llm_inference.ipynb b/architects_of_future/Intel Inference model check/simple_llm_inference.ipynb new file mode 100644 index 00000000..4fe742f5 --- /dev/null +++ b/architects_of_future/Intel Inference model check/simple_llm_inference.ipynb @@ -0,0 +1,812 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "8b40b326-4549-4b2b-8ce1-a453fbaa7a19", + "metadata": {}, + "source": [ + "SPDX-License-Identifier: Apache-2.0\n", + "Copyright (c) 2023, Rahul Unnikrishnan Nair " + ] + }, + { + "cell_type": "markdown", + "id": "1d173d6a-a86d-441a-b36a-efed57814310", + "metadata": {}, + "source": [ + "---\n", + "**Simple LLM Inference: Playing with Language Models on Intel® Data Center Max Series GPUs**" + ] + }, + { + "cell_type": "markdown", + "id": "c86f2e84-54e8-4c1d-bfec-302f9dff577d", + "metadata": {}, + "source": [ + "Hello and welcome! Are you curious about how computers understand and generate human-like text? Do you want to play around with text generation without getting too technical? Then you've come to the right place.\n", + "\n", + "Large Language Models (LLMs) have a wide range of applications, but they can also be fun to experiment with. Here, we'll use some simple pre-trained models to explore text generation interactively.\n", + "\n", + "Powered by Intel® Data Center GPU Max 1100s, this notebook provides a hands-on experience that doesn't require deep technical knowledge. Whether you're a student, writer, educator, or just curious about AI, this guide is designed for you.\n", + "\n", + "Ready to try it out? Let's set up our environment and start exploring the world of text generation with LLMs!\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e47145a5-c2b2-4957-8ce0-51c0fb1bf9a5", + "metadata": {}, + "outputs": [], + "source": [ + "# Required packages, install if not installed (assume PyTorch* and Intel® Extension for PyTorch* is already present)\n", + "#import sys\n", + "#!conda install -y --quiet --prefix {sys.prefix} -c conda-forge \\\n", + "# accelerate==0.23.0 \\\n", + "# validators==0.22.0 \\\n", + "# transformers==4.32.1 \\\n", + "# sentencepiece \\\n", + "# pillow \\\n", + "# ipywidgets \\\n", + "# ipython > /dev/null && echo \"Installation successful\" || echo \"Installation failed\"\n", + "\n", + "import sys\n", + "import site\n", + "from pathlib import Path\n", + "\n", + "!echo \"Installation in progress...\"\n", + "!{sys.executable} -m pip install -U transformers==4.35.2 --no-warn-script-location > /dev/null && echo \"Installation successful\" || echo \"Installation failed\"\n", + "\n", + "def get_python_version():\n", + " return \"python\" + \".\".join(map(str, sys.version_info[:2]))\n", + "\n", + "def set_local_bin_path():\n", + " local_bin = str(Path.home() / \".local\" / \"bin\") \n", + " local_site_packages = str(\n", + " Path.home() / \".local\" / \"lib\" / get_python_version() / \"site-packages\"\n", + " )\n", + " sys.path.append(local_bin)\n", + " sys.path.insert(0, site.getusersitepackages())\n", + " sys.path.insert(0, sys.path.pop(sys.path.index(local_site_packages)))\n", + "\n", + "set_local_bin_path()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1abc93ee-117e-4d10-a97c-c26429b38159", + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "import os\n", + "import random\n", + "import re\n", + "\n", + "os.environ[\"SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS\"] = \"1\"\n", + "os.environ[\"ENABLE_SDP_FUSION\"] = \"1\"\n", + "import warnings\n", + "\n", + "# Suppress warnings for a cleaner output\n", + "warnings.filterwarnings(\"ignore\")\n", + "\n", + "import torch\n", + "import intel_extension_for_pytorch as ipex\n", + "\n", + "from transformers import AutoModelForCausalLM, AutoTokenizer\n", + "from transformers import LlamaTokenizer, LlamaForCausalLM\n", + "from transformers import BertTokenizer, BertForSequenceClassification\n", + "\n", + "from ipywidgets import VBox, HBox, Button, Dropdown, IntSlider, FloatSlider, Text, Output, Label, Layout\n", + "import ipywidgets as widgets\n", + "from ipywidgets import HTML\n", + "\n", + "\n", + "# random seed\n", + "if torch.xpu.is_available():\n", + " seed = 88\n", + " random.seed(seed)\n", + " torch.xpu.manual_seed(seed)\n", + " torch.xpu.manual_seed_all(seed)\n", + "\n", + "def select_device(preferred_device=None):\n", + " \"\"\"\n", + " Selects the best available XPU device or the preferred device if specified.\n", + "\n", + " Args:\n", + " preferred_device (str, optional): Preferred device string (e.g., \"cpu\", \"xpu\", \"xpu:0\", \"xpu:1\", etc.). If None, a random available XPU device will be selected or CPU if no XPU devices are available.\n", + "\n", + " Returns:\n", + " torch.device: The selected device object.\n", + " \"\"\"\n", + " try:\n", + " if preferred_device and preferred_device.startswith(\"cpu\"):\n", + " print(\"Using CPU.\")\n", + " return torch.device(\"cpu\")\n", + " if preferred_device and preferred_device.startswith(\"xpu\"):\n", + " if preferred_device == \"xpu\" or (\n", + " \":\" in preferred_device\n", + " and int(preferred_device.split(\":\")[1]) >= torch.xpu.device_count()\n", + " ):\n", + " preferred_device = (\n", + " None # Handle as if no preferred device was specified\n", + " )\n", + " else:\n", + " device = torch.device(preferred_device)\n", + " if device.type == \"xpu\" and device.index < torch.xpu.device_count():\n", + " vram_used = torch.xpu.memory_allocated(device) / (\n", + " 1024**2\n", + " ) # In MB\n", + " print(\n", + " f\"Using preferred device: {device}, VRAM used: {vram_used:.2f} MB\"\n", + " )\n", + " return device\n", + "\n", + " if torch.xpu.is_available():\n", + " device_id = random.choice(\n", + " range(torch.xpu.device_count())\n", + " ) # Select a random available XPU device\n", + " device = torch.device(f\"xpu:{device_id}\")\n", + " vram_used = torch.xpu.memory_allocated(device) / (1024**2) # In MB\n", + " print(f\"Selected device: {device}, VRAM used: {vram_used:.2f} MB\")\n", + " return device\n", + " except Exception as e:\n", + " print(f\"An error occurred while selecting the device: {e}\")\n", + " print(\"No XPU devices available or preferred device not found. Using CPU.\")\n", + " return torch.device(\"cpu\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "416381a3-6293-4701-a8fa-76f1402037dc", + "metadata": {}, + "source": [ + "---\n", + "**A Glimpse Into Text Generation with Language Models**\n", + "\n", + "If you're intrigued by how machines can generate human-like text, let's take a closer look at the underlying code. Even if you're not technically inclined, this section will provide a high-level understanding of how it all works.:\n", + "\n", + "- **Class Definition**: The `ChatBotModel` class is the core of our text generation. It handles the setup, optimization, and interaction with the LLM (Large Language Model).\n", + "\n", + "- **Initialization**: When you create an instance of this class, you can specify the model's path, the device to run on (defaulting to Intel's \"xpu\" device if available), and the data type. There's also an option to optimize the model for Intel GPUs using Intel Extension For PyTorch (IPEX).\n", + "\n", + "- **Input Preparation**: The `prepare_input` method ensures that the input doesn't exceed the maximum length and combines the previous text with the user input, if required.\n", + "\n", + "- **Output Generation**: The `gen_output` method takes the prepared input and several parameters controlling the generation process, like temperature, top_p, top_k, etc., and produces the text response.\n", + "\n", + "- **Warm-up**: Before the main interactions, the `warmup_model` method helps in \"warming up\" the model to make subsequent runs faster.\n", + "\n", + "- **Text Processing**: Several methods like `unique_sentences`, `remove_repetitions`, and `extract_bot_response` handle the text processing to ensure the generated text is readable and free from repetitions or unnecessary echoes.\n", + "\n", + "Feel free to explore the code and play around with different parameters. Remember, this is a simple and interactive way to experiment with text generation. It's not a cutting-edge chatbot, but rather a playful tool to engage with language models. Enjoy the journey into the world of LLMs, using Intel® Data Center GPU Max 1100s!\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa2f29e9-cd41-4605-aafc-f5aaaf440469", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "MODEL_CACHE_PATH = \"/home/common/data/Big_Data/GenAI/llm_models\"\n", + "class ChatBotModel:\n", + " \"\"\"\n", + " ChatBotModel is a class for generating responses based on text prompts using a pretrained model.\n", + "\n", + " Attributes:\n", + " - device: The device to run the model on. Default is \"xpu\" if available, otherwise \"cpu\".\n", + " - model: The loaded model for text generation.\n", + " - tokenizer: The loaded tokenizer for the model.\n", + " - torch_dtype: The data type to use in the model.\n", + " \"\"\"\n", + "\n", + " def __init__(\n", + " self,\n", + " model_id_or_path: str = \"openlm-research/open_llama_3b_v2\", # \"Writer/camel-5b-hf\",\n", + " torch_dtype: torch.dtype = torch.bfloat16,\n", + " optimize: bool = True,\n", + " ) -> None:\n", + " \"\"\"\n", + " The initializer for ChatBotModel class.\n", + "\n", + " Parameters:\n", + " - model_id_or_path: The identifier or path of the pretrained model.\n", + " - torch_dtype: The data type to use in the model. Default is torch.bfloat16.\n", + " - optimize: If True, ipex is used to optimized the model\n", + " \"\"\"\n", + " self.torch_dtype = torch_dtype\n", + " self.device = select_device(\"xpu\")\n", + " self.model_id_or_path = model_id_or_path\n", + " local_model_id = self.model_id_or_path.replace(\"/\", \"--\")\n", + " local_model_path = os.path.join(MODEL_CACHE_PATH, local_model_id)\n", + "\n", + " if (\n", + " self.device == self.device.startswith(\"xpu\")\n", + " if isinstance(self.device, str)\n", + " else self.device.type == \"xpu\"\n", + " ):\n", + "\n", + " self.autocast = torch.xpu.amp.autocast\n", + " else:\n", + " self.autocast = torch.cpu.amp.autocast\n", + " self.torch_dtype = torch_dtype\n", + " try:\n", + " if \"llama\" in model_id_or_path:\n", + " self.tokenizer = LlamaTokenizer.from_pretrained(local_model_path)\n", + " self.model = (\n", + " LlamaForCausalLM.from_pretrained(\n", + " local_model_path,\n", + " low_cpu_mem_usage=True,\n", + " torch_dtype=self.torch_dtype,\n", + " )\n", + " .to(self.device)\n", + " .eval()\n", + " )\n", + " else:\n", + " self.tokenizer = AutoTokenizer.from_pretrained(\n", + " local_model_path, trust_remote_code=True\n", + " )\n", + " self.model = (\n", + " AutoModelForCausalLM.from_pretrained(\n", + " local_model_path,\n", + " low_cpu_mem_usage=True,\n", + " trust_remote_code=True,\n", + " torch_dtype=self.torch_dtype,\n", + " )\n", + " .to(self.device)\n", + " .eval()\n", + " )\n", + " except (OSError, ValueError, EnvironmentError) as e:\n", + " logging.info(\n", + " f\"Tokenizer / model not found locally. Downloading tokenizer / model for {self.model_id_or_path} to cache...: {e}\"\n", + " )\n", + " if \"llama\" in model_id_or_path:\n", + " self.tokenizer = LlamaTokenizer.from_pretrained(self.model_id_or_path)\n", + " self.model = (\n", + " LlamaForCausalLM.from_pretrained(\n", + " self.model_id_or_path,\n", + " low_cpu_mem_usage=True,\n", + " torch_dtype=self.torch_dtype,\n", + " )\n", + " .to(self.device)\n", + " .eval()\n", + " )\n", + " else:\n", + " self.tokenizer = AutoTokenizer.from_pretrained(\n", + " self.model_id_or_path, trust_remote_code=True\n", + " )\n", + " self.model = (\n", + " AutoModelForCausalLM.from_pretrained(\n", + " self.model_id_or_path,\n", + " low_cpu_mem_usage=True,\n", + " trust_remote_code=True,\n", + " torch_dtype=self.torch_dtype,\n", + " )\n", + " .to(self.device)\n", + " .eval()\n", + " )\n", + " \n", + " self.max_length = 256\n", + "\n", + " if optimize:\n", + " if hasattr(ipex, \"optimize_transformers\"):\n", + " try:\n", + " ipex.optimize_transformers(self.model, dtype=self.torch_dtype)\n", + " except:\n", + " ipex.optimize(self.model, dtype=self.torch_dtype)\n", + " else:\n", + " ipex.optimize(self.model, dtype=self.torch_dtype)\n", + "\n", + " def prepare_input(self, previous_text, user_input):\n", + " \"\"\"Prepare the input for the model, ensuring it doesn't exceed the maximum length.\"\"\"\n", + " response_buffer = 100\n", + " user_input = (\n", + " \"Below is an instruction that describes a task. \"\n", + " \"Write a response that appropriately completes the request.\\n\\n\"\n", + " f\"### Instruction:\\n{user_input}\\n\\n### Response:\")\n", + " combined_text = previous_text + \"\\nUser: \" + user_input + \"\\nBot: \"\n", + " input_ids = self.tokenizer.encode(\n", + " combined_text, return_tensors=\"pt\", truncation=False\n", + " )\n", + " adjusted_max_length = self.max_length - response_buffer\n", + " if input_ids.shape[1] > adjusted_max_length:\n", + " input_ids = input_ids[:, -adjusted_max_length:]\n", + " return input_ids.to(device=self.device)\n", + "\n", + " def gen_output(\n", + " self, input_ids, temperature, top_p, top_k, num_beams, repetition_penalty\n", + " ):\n", + " \"\"\"\n", + " Generate the output text based on the given input IDs and generation parameters.\n", + "\n", + " Args:\n", + " input_ids (torch.Tensor): The input tensor containing token IDs.\n", + " temperature (float): The temperature for controlling randomness in Boltzmann distribution.\n", + " Higher values increase randomness, lower values make the generation more deterministic.\n", + " top_p (float): The cumulative distribution function (CDF) threshold for Nucleus Sampling.\n", + " Helps in controlling the trade-off between randomness and diversity.\n", + " top_k (int): The number of highest probability vocabulary tokens to keep for top-k-filtering.\n", + " num_beams (int): The number of beams for beam search. Controls the breadth of the search.\n", + " repetition_penalty (float): The penalty applied for repeating tokens.\n", + "\n", + " Returns:\n", + " torch.Tensor: The generated output tensor.\n", + " \"\"\"\n", + " print(f\"Using max length: {self.max_length}\")\n", + " with self.autocast(\n", + " enabled=True if self.torch_dtype != torch.float32 else False,\n", + " dtype=self.torch_dtype,\n", + " ):\n", + " with torch.no_grad():\n", + " output = self.model.generate(\n", + " input_ids,\n", + " pad_token_id=self.tokenizer.eos_token_id,\n", + " max_length=self.max_length,\n", + " temperature=temperature,\n", + " top_p=top_p,\n", + " top_k=top_k,\n", + " num_beams=num_beams,\n", + " repetition_penalty=repetition_penalty,\n", + " )\n", + " return output\n", + "\n", + " def warmup_model(\n", + " self, temperature, top_p, top_k, num_beams, repetition_penalty\n", + " ) -> None:\n", + " \"\"\"\n", + " Warms up the model by generating a sample response.\n", + " \"\"\"\n", + " sample_prompt = \"\"\"A dialog, where User interacts with a helpful Bot.\n", + " AI is helpful, kind, obedient, honest, and knows its own limits.\n", + " User: Hello, Bot.\n", + " Bot: Hello! How can I assist you today?\n", + " \"\"\"\n", + " input_ids = self.tokenizer(sample_prompt, return_tensors=\"pt\").input_ids.to(\n", + " device=self.device\n", + " )\n", + " _ = self.gen_output(\n", + " input_ids,\n", + " temperature=temperature,\n", + " top_p=top_p,\n", + " top_k=top_k,\n", + " num_beams=num_beams,\n", + " repetition_penalty=repetition_penalty,\n", + " )\n", + "\n", + " def strip_response(self, generated_text):\n", + " \"\"\"Remove ### Response: from string if exists.\"\"\"\n", + " match = re.search(r'### Response:(.*)', generated_text, re.S)\n", + " if match:\n", + " return match.group(1).strip()\n", + " \n", + " else:\n", + " return generated_text\n", + " \n", + " def unique_sentences(self, text: str) -> str:\n", + " sentences = text.split(\". \")\n", + " if sentences[-1] and sentences[-1][-1] != \".\":\n", + " sentences = sentences[:-1]\n", + " sentences = set(sentences)\n", + " return \". \".join(sentences) + \".\" if sentences else \"\"\n", + "\n", + " def remove_repetitions(self, text: str, user_input: str) -> str:\n", + " \"\"\"\n", + " Remove repetitive sentences or phrases from the generated text and avoid echoing user's input.\n", + "\n", + " Args:\n", + " text (str): The input text with potential repetitions.\n", + " user_input (str): The user's original input to check against echoing.\n", + "\n", + " Returns:\n", + " str: The processed text with repetitions and echoes removed.\n", + " \"\"\"\n", + " text = re.sub(re.escape(user_input), \"\", text, count=1).strip()\n", + " text = self.unique_sentences(text)\n", + " return text\n", + "\n", + " def extract_bot_response(self, generated_text: str) -> str:\n", + " \"\"\"\n", + " Extract the first response starting with \"Bot:\" from the generated text.\n", + "\n", + " Args:\n", + " generated_text (str): The full generated text from the model.\n", + "\n", + " Returns:\n", + " str: The extracted response starting with \"Bot:\".\n", + " \"\"\"\n", + " prefix = \"Bot:\"\n", + " generated_text = generated_text.replace(\"\\n\", \". \")\n", + " bot_response_start = generated_text.find(prefix)\n", + " if bot_response_start != -1:\n", + " response_start = bot_response_start + len(prefix)\n", + " end_of_response = generated_text.find(\"\\n\", response_start)\n", + " if end_of_response != -1:\n", + " return generated_text[response_start:end_of_response].strip()\n", + " else:\n", + " return generated_text[response_start:].strip()\n", + " return re.sub(r'^[^a-zA-Z0-9]+', '', generated_text)\n", + "\n", + " def interact(\n", + " self,\n", + " out: Output, # Output widget to display the conversation\n", + " with_context: bool = True,\n", + " temperature: float = 0.10,\n", + " top_p: float = 0.95,\n", + " top_k: int = 40,\n", + " num_beams: int = 3,\n", + " repetition_penalty: float = 1.80,\n", + " ) -> None:\n", + " \"\"\"\n", + " Handle the chat loop where the user provides input and receives a model-generated response.\n", + "\n", + " Args:\n", + " with_context (bool): Whether to consider previous interactions in the session. Default is True.\n", + " temperature (float): The temperature for controlling randomness in Boltzmann distribution.\n", + " Higher values increase randomness, lower values make the generation more deterministic.\n", + " top_p (float): The cumulative distribution function (CDF) threshold for Nucleus Sampling.\n", + " Helps in controlling the trade-off between randomness and diversity.\n", + " top_k (int): The number of highest probability vocabulary tokens to keep for top-k-filtering.\n", + " num_beams (int): The number of beams for beam search. Controls the breadth of the search.\n", + " repetition_penalty (float): The penalty applied for repeating tokens.\n", + " \"\"\"\n", + " previous_text = \"\"\n", + " \n", + " def display_user_input_widgets():\n", + " default_color = \"\\033[0m\"\n", + " user_color, user_icon = \"\\033[94m\", \"😀 \"\n", + " bot_color, bot_icon = \"\\033[92m\", \"🤖 \"\n", + " user_input_widget = Text(placeholder=\"Type your message here...\", layout=Layout(width='80%'))\n", + " send_button = Button(description=\"Send\", button_style = \"primary\", layout=Layout(width='10%'))\n", + " chat_spin = HTML(value = \"\")\n", + " spin_style = \"\"\"\n", + "
\n", + " \n", + " \"\"\"\n", + " display(HBox([chat_spin, user_input_widget, send_button, ]))\n", + " \n", + " def on_send(button):\n", + " nonlocal previous_text\n", + " send_button.button_style = \"warning\"\n", + " chat_spin.value = spin_style\n", + " orig_input = \"\"\n", + " user_input = user_input_widget.value\n", + " with out:\n", + " print(f\" {user_color}{user_icon}You: {user_input}{default_color}\")\n", + " if user_input.lower() == \"exit\":\n", + " return\n", + " if \"camel\" in self.model_id_or_path:\n", + " orig_input = user_input\n", + " user_input = (\n", + " \"Below is an instruction that describes a task. \"\n", + " \"Write a response that appropriately completes the request.\\n\\n\"\n", + " f\"### Instruction:\\n{user_input}\\n\\n### Response:\")\n", + " if with_context:\n", + " self.max_length = 256\n", + " input_ids = self.prepare_input(previous_text, user_input)\n", + " else:\n", + " self.max_length = 96\n", + " input_ids = self.tokenizer.encode(user_input, return_tensors=\"pt\").to(self.device)\n", + " \n", + " output_ids = self.gen_output(\n", + " input_ids,\n", + " temperature=temperature,\n", + " top_p=top_p,\n", + " top_k=top_k,\n", + " num_beams=num_beams,\n", + " repetition_penalty=repetition_penalty,\n", + " )\n", + " generated_text = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)\n", + " generated_text = self.strip_response(generated_text)\n", + " generated_text = self.extract_bot_response(generated_text)\n", + " generated_text = self.remove_repetitions(generated_text, user_input)\n", + " send_button.button_style = \"success\"\n", + " chat_spin.value = \"\"\n", + "\n", + " with out:\n", + " if orig_input:\n", + " user_input = orig_input\n", + " print(f\" {bot_color}{bot_icon}Bot: {generated_text}{default_color}\") \n", + " if with_context:\n", + " previous_text += \"\\nUser: \" + user_input + \"\\nBot: \" + generated_text\n", + " user_input_widget.value = \"\" \n", + " display_user_input_widgets()\n", + " send_button.on_click(on_send)\n", + " display_user_input_widgets()" + ] + }, + { + "cell_type": "markdown", + "id": "d3a61ad2-5155-4b07-9f10-d9d18f252e8f", + "metadata": {}, + "source": [ + "---\n", + "**Setting Up the Interactive Text Generation Interface**\n", + "\n", + "In the next section, we'll create an interactive text generation interface right here in this notebook. This will enable you to select a model, provide a prompt, and tweak various parameters without touching the code itself.\n", + "\n", + "- **Model Selection**: Choose from available pre-trained models or enter a custom model from the HuggingFace Hub.\n", + "- **Interaction Mode**: Decide whether to interact with or without context, allowing the model to remember previous interactions or treat each input independently.\n", + "- **Temperature**: Adjust this parameter to control the randomness in text generation. Higher values increase creativity; lower values make the generation more deterministic.\n", + "- **Top_p, Top_k**: Play with these parameters to influence the diversity and quality of the generated text.\n", + "- **Number of Beams**: Control the breadth of the search in text generation.\n", + "- **Repetition Penalty**: Modify this to prevent or allow repeated phrases and sentences.\n", + "\n", + "Once you've set your preferences, you can start the interaction and even reset or reload the model to try different settings. Let's set this up and explore the playful world of text generation using Intel® Data Center GPU Max 1100s!\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "42264502-ffdd-47e1-af1e-f7407c4b365f", + "metadata": {}, + "outputs": [], + "source": [ + "model_cache = {}\n", + "\n", + "from ipywidgets import HTML\n", + "def interact_with_llm():\n", + " models = [\"Writer/camel-5b-hf\", \n", + " \"openlm-research/open_llama_3b_v2\",\n", + " \"Intel/neural-chat-7b-v3\", \n", + " \"Intel/neural-chat-7b-v3-1\", # https://huggingface.co/Intel/neural-chat-7b-v3-1 - checkout the prompting template on the site to get better response.\n", + " \"HuggingFaceH4/zephyr-7b-beta\", \n", + " \"tiiuae/falcon-7b\"\n", + " ]\n", + " interaction_modes = [\"Interact with context\", \"Interact without context\"]\n", + " model_dropdown = Dropdown(options=models, value=models[0], description=\"Model:\")\n", + " interaction_mode = Dropdown(options=interaction_modes, value=interaction_modes[1], description=\"Interaction:\")\n", + " temperature_slider = FloatSlider(value=0.71, min=0, max=1, step=0.01, description=\"Temperature:\")\n", + " top_p_slider = FloatSlider(value=0.95, min=0, max=1, step=0.01, description=\"Top P:\")\n", + " top_k_slider = IntSlider(value=40, min=0, max=100, step=1, description=\"Top K:\")\n", + " num_beams_slider = IntSlider(value=3, min=1, max=10, step=1, description=\"Num Beams:\")\n", + " repetition_penalty_slider = FloatSlider(value=1.80, min=0, max=2, step=0.1, description=\"Rep Penalty:\")\n", + " \n", + " out = Output() \n", + " left_panel = VBox([model_dropdown, interaction_mode], layout=Layout(margin=\"0px 20px 10px 0px\"))\n", + " right_panel = VBox([temperature_slider, top_p_slider, top_k_slider, num_beams_slider, repetition_penalty_slider],\n", + " layout=Layout(margin=\"0px 0px 10px 20px\"))\n", + " user_input_widgets = HBox([left_panel, right_panel], layout=Layout(margin=\"0px 50px 10px 0px\"))\n", + " spinner = HTML(value=\"\")\n", + " start_button = Button(description=\"Start Interaction!\", button_style=\"primary\")\n", + " start_button_spinner = HBox([start_button, spinner])\n", + " start_button_spinner.layout.margin = '0 auto'\n", + " display(user_input_widgets)\n", + " display(start_button_spinner)\n", + " display(out)\n", + " \n", + " def on_start(button):\n", + " start_button.button_style = \"warning\"\n", + " start_button.description = \"Loading...\"\n", + " spinner.value = \"\"\"\n", + "
\n", + " \n", + " \"\"\"\n", + " out.clear_output()\n", + " with out:\n", + " print(\"\\nSetting up the model, please wait...\")\n", + " #out.clear_output()\n", + " model_choice = model_dropdown.value\n", + " with_context = interaction_mode.value == interaction_modes[0]\n", + " temperature = temperature_slider.value\n", + " top_p = top_p_slider.value\n", + " top_k = top_k_slider.value\n", + " num_beams = num_beams_slider.value\n", + " repetition_penalty = repetition_penalty_slider.value\n", + " model_key = (model_choice, \"xpu\")\n", + " if model_key not in model_cache:\n", + " model_cache[model_key] = ChatBotModel(model_id_or_path=model_choice)\n", + " bot = model_cache[model_key]\n", + " #if model_key not in model_cache:\n", + " # bot.warmup_model(\n", + " # temperature=temperature,\n", + " # top_p=top_p,\n", + " # top_k=top_k,\n", + " # num_beams=num_beams,\n", + " # repetition_penalty=repetition_penalty,\n", + " # )\n", + " \n", + " with out:\n", + " start_button.button_style = \"success\"\n", + " start_button.description = \"Refresh\"\n", + " spinner.value = \"\"\n", + " print(\"Ready!\")\n", + " print(\"\\nNote: This is a demonstration using pretrained models which were not fine-tuned for chat.\")\n", + " print(\"If the bot doesn't respond, try clicking on refresh.\\n\")\n", + " try:\n", + " with out:\n", + " bot.interact(\n", + " with_context=with_context,\n", + " out=out,\n", + " temperature=temperature,\n", + " top_p=top_p,\n", + " top_k=top_k,\n", + " num_beams=num_beams,\n", + " repetition_penalty=repetition_penalty,\n", + " )\n", + " except Exception as e:\n", + " with out:\n", + " print(f\"An error occurred: {e}\")\n", + "\n", + " start_button.on_click(on_start)\n" + ] + }, + { + "cell_type": "markdown", + "id": "5d7322e1-abae-4a2a-9381-453dc0cf0c23", + "metadata": {}, + "source": [ + "---\n", + "**Let's Dive In and Have Some Fun with LLM Models!**\n", + "\n", + "Ready for a playful interaction with some interesting LLM models? The interface below lets you choose from different models and settings. Just select your preferences, click the \"Start Interaction!\" button, and you're ready to chat.\n", + "\n", + "You can ask questions, make statements, or simply explore how the model responds to different inputs. It's a friendly way to get acquainted with AI and see what it has to say.\n", + "\n", + "Remember, this is all in good fun, and the models are here to engage with you. So go ahead, start a conversation, and enjoy the interaction!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b754474c-8a98-4d55-bb69-cd98a8c209e3", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "interact_with_llm()" + ] + }, + { + "cell_type": "markdown", + "id": "2d0d5473-e7fe-4de0-a8c0-f49ba924f35d", + "metadata": {}, + "source": [ + "## Language Models Disclaimer and Information\n", + "\n", + "### Camel-5B\n", + "- **Model card:** [Camel-5B](https://huggingface.co/Writer/camel-5b-hf)\n", + "- **License:** Apache 2.0\n", + "- **Reference:**\n", + " ```bibtex\n", + " @misc{Camel,\n", + " author = {Writer Engineering team},\n", + " title = {{Camel-5B InstructGPT}},\n", + " howpublished = {\\url{https://dev.writer.com}},\n", + " year = 2023,\n", + " month = April \n", + " }\n", + " ```\n", + "\n", + "### OpenLLaMA 3b v2\n", + "- **Model card:** [OpenLLaMA 3b v2](https://huggingface.co/openlm-research/open_llama_3b_v2)\n", + "- **License:** Apache 2.0\n", + "- **References:**\n", + " ```bibtex\n", + " @software{openlm2023openllama,\n", + " author = {Geng, Xinyang and Liu, Hao},\n", + " title = {OpenLLaMA: An Open Reproduction of LLaMA},\n", + " month = May,\n", + " year = 2023,\n", + " url = {https://github.com/openlm-research/open_llama}\n", + " }\n", + " @software{together2023redpajama,\n", + " author = {Together Computer},\n", + " title = {RedPajama-Data: An Open Source Recipe to Reproduce LLaMA training dataset},\n", + " month = April,\n", + " year = 2023,\n", + " url = {https://github.com/togethercomputer/RedPajama-Data}\n", + " }\n", + " @article{touvron2023llama,\n", + " title={Llama: Open and efficient foundation language models},\n", + " author={Touvron, Hugo and Lavril, Thibaut and Izacard, Gautier and Martinet, Xavier and Lachaux, Marie-Anne and Lacroix, Timoth{\\'e}e and Rozi{\\`e}re, Baptiste and Goyal, Naman and Hambro, Eric and Azhar, Faisal and others},\n", + " journal={arXiv preprint arXiv:2302.13971},\n", + " year={2023}\n", + " }\n", + " ```\n", + "### Falcon 7B\n", + "\n", + "- **Model card:** [Falcon 7B](https://huggingface.co/tiiuae/falcon-7b)\n", + "- **License:** Apache 2.0\n", + "- **References:**\n", + " ```bibtex\n", + " @article{falcon40b,\n", + " title = {{Falcon-40B}: an open large language model with state-of-the-art performance},\n", + " author = {Almazrouei, Ebtesam and Alobeidli, Hamza and Alshamsi, Abdulaziz and Cappelli, Alessandro and Cojocaru, Ruxandra and Debbah, Merouane and Goffinet, Etienne and Heslow, Daniel and Launay, Julien and Malrtic, Quentin and Noune, Badreddine and Pannier, Baptiste and Penedo, Guilherme},\n", + " year={2023}\n", + " }\n", + " ```\n", + "### Zephyr 7B\n", + "\n", + "- **Model card:** [Zephyr 7B](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta)\n", + "- **License:** MIT\n", + "- **References:**\n", + " ```bibtex\n", + " @misc{alignment_handboox2023,\n", + " author = {Lewis Tunstall and Edward Beeching and Nathan Lambert and Nazneen Rajani and Alexander M. Rush and Thomas Wolf},\n", + " title = {The Alignment Handbook},\n", + " year = {2023},\n", + " publisher = {GitHub},\n", + " journal = {GitHub repository},\n", + " howpublished = {\\url{https://github.com/huggingface/alignment-handbook}}\n", + " }\n", + " ```\n", + "### Neural Chat 7b\n", + "- **Model card:** [Neural Chat](https://huggingface.co/Intel/neural-chat-7b-v3)\n", + "- **License:** Apache 2.0\n", + "\n", + "### Disclaimer for Using Large Language Models\n", + "\n", + "Please be aware that while Large Language Models like Camel-5B and OpenLLaMA 3b v2 are powerful tools for text generation, they may sometimes produce results that are unexpected, biased, or inconsistent with the given prompt. It's advisable to carefully review the generated text and consider the context and application in which you are using these models.\n", + "\n", + "Usage of these models must also adhere to the licensing agreements and be in accordance with ethical guidelines and best practices for AI. If you have any concerns or encounter issues with the models, please refer to the respective model cards and documentation provided in the links above.\n", + "\n", + "To the extent that any public or non-Intel datasets or models are referenced by or accessed using these materials those datasets or models are provided by the third party indicated as the content source. Intel does not create the content and does not warrant its accuracy or quality. By accessing the public content, or using materials trained on or with such content, you agree to the terms associated with that content and that your use complies with the applicable license.\n", + "\n", + " \n", + "Intel expressly disclaims the accuracy, adequacy, or completeness of any such public content, and is not liable for any errors, omissions, or defects in the content, or for any reliance on the content. Intel is not liable for any liability or damages relating to your use of public content.\n", + "\n", + "Intel’s provision of these resources does not expand or otherwise alter Intel’s applicable published warranties or warranty disclaimers for Intel products or solutions, and no additional obligations, indemnifications, or liabilities arise from Intel providing such resources. Intel reserves the right, without notice, to make corrections, enhancements, improvements, and other changes to its materials.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "pytorch-gpu", + "language": "python", + "name": "pytorch-gpu" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/architects_of_future/Metrics/Screenshot 2023-12-06 154842.png b/architects_of_future/Metrics/Screenshot 2023-12-06 154842.png new file mode 100644 index 00000000..4eb26d01 Binary files /dev/null and b/architects_of_future/Metrics/Screenshot 2023-12-06 154842.png differ diff --git a/architects_of_future/Metrics/download (1).png b/architects_of_future/Metrics/download (1).png new file mode 100644 index 00000000..5130201f Binary files /dev/null and b/architects_of_future/Metrics/download (1).png differ diff --git a/architects_of_future/Metrics/download (2).png b/architects_of_future/Metrics/download (2).png new file mode 100644 index 00000000..41a4e391 Binary files /dev/null and b/architects_of_future/Metrics/download (2).png differ diff --git a/architects_of_future/Metrics/download.png b/architects_of_future/Metrics/download.png new file mode 100644 index 00000000..eb97ca23 Binary files /dev/null and b/architects_of_future/Metrics/download.png differ diff --git a/architects_of_future/Metrics/img.jpeg b/architects_of_future/Metrics/img.jpeg new file mode 100644 index 00000000..94f2c9c7 Binary files /dev/null and b/architects_of_future/Metrics/img.jpeg differ diff --git a/architects_of_future/README.md b/architects_of_future/README.md new file mode 100644 index 00000000..6c4f52dc --- /dev/null +++ b/architects_of_future/README.md @@ -0,0 +1,165 @@ +# Intel-Hackathon +Team Name: Architects of Future +Team Leader Email:irul.mathi1@wipro.com +Enlightening Justice: Leveraging LLM and AI Analytics Toolkit for Legal support in intel oneAPI AI analytics toolkit Hackathon +Trained models + +``` +git lfs install +git clone https://huggingface.co/sriramahesh2000/sample2 + +``` +``` +git lfs install +git clone https://huggingface.co/sriramahesh2000/simple1 +``` + +# Problem Statement +For legal professionals, staying updated with the latest laws and judgments can be a challenging task. According to LexusNexus research, nearly 65 percent of a lawyer's time is dedicated solely to legal research on relevant laws. Additionally, grappling with lengthy verdicts and chargesheets is a time-consuming and arduous process. The preparation of legal documents is also a taxing task that demands meticulous attention. In response to these challenges, we have undertaken a project to develop a smart Legal Language Model (LLM) fine-tuned on legal data, capable of addressing the aforementioned issues. This article will provide an in-depth exploration of our project, highlighting key components and technologies that have facilitated the development of an effective solution. + +# Intel One API AIAnalytics toolkit- Boon for Developers ![image](https://github.com/Sriram-code/Intel-Hackathon/assets/75485469/c4da56ab-906a-4aa3-b3cd-47f93e3f7b59) + +The main goal of our project was to fine-tune a large language model using legal datasets. The aim was for the model to assimilate recent changes, enabling it to provide accurate guidance to lawyers by extracting relevant laws and facilitating a comprehensive understanding of extensive documents, with the ability to summarize key points effectively. During the training process, we were impressed by the efficiency of the Intel Developer Cloud, particularly the impactful performance of the AI Analytics Toolkit. The optimization of pandas and numpy by Intel significantly enhanced processing speed, surpassing our expectations. Additionally, the efficiency of quantization with OpenVino NNCF was a pleasant surprise, contributing to faster inference capabilities. + +# Description +We employed the Zephyr-7b-beta model, surpassing many larger models of its kind in terms of performance. Despite its enhanced capabilities, controlling its proclivity for hallucinations proved to be challenging. Extensive training was conducted using a substantial synthetic dataset gathered from diverse platforms, including large language model completion datasets, open-source information, and legal databases. This exhaustive training equipped the model with comprehensive knowledge of Indian laws, recent developments, significant judgments, and more. + +# Intel AI analytics toolkit in Data preprocessing +The synthetically collected dataset have to be extensively preprocessed before sending to LLM for training. Here we have used Intel optimized Pandas and NumPy. This have improved the speed to multifold, made even CPU computation so powerful .It made the program utilize all the cores in CPU instead of leaving them idle. just change in a line have improved our efficiency multifold. + +# Intel AI analytics toolkit in training +In the training process, we utilized the same foundational model, enriching it with extensive knowledge of Indian laws and crucial legal cases. Subsequently, this base model underwent separate training for three distinct tasks: summarizing legal documents and generating new legal content. +We have tried with different models like Mistral 7b, llama2 13b, Flang T5, and Zephyr 7b, and codes used for finetuning these models in both idc and colab are attached. Some have not provided better results, some have crashed in idc, because of model size and dataset size.Finally, We have decided to finetune a Zephyr 7B BETA quantized model in GPTQ format as GGML format models are not trainable, and trainig entire model became impractical at that odd hour. +The implementation of Intel optimized PyTorch significantly enhanced code optimization. Despite the unfortunate loss of our trained model due to a system crash at IDC, the evident reduction in training loss underscored the success of our efforts. The step-by-step guidance provided for fine-tuning the Large Language Model (LLM) through peft LORA proved to be exceptionally beneficial. + +# Post training Quantization ![image](https://github.com/Sriram-code/Intel-Hackathon/assets/75485469/2574a6b1-9b54-470c-a444-24eb0633768d) +Leveraging a large Language Model for inference poses challenges, and the OpenVINO Neural Network Compression Framework (NNCF) method for quantization proves to be an excellent solution. The detailed steps outlined in notebook 254, available in the training folder, were instrumental for post-training quantization. In a trial run, we applied this method to the actual Zephyr 7b beta model without further training. The model was successfully converted to INT8 format using only the CPU, resulting in a streamlined 6 GB bin-sized model. This transformation significantly accelerated the inference process without any discernible drop in performance and later in discord, it is stated that usage of Openvino toolkit is prohibited, so this model is not utilized and code used for quantizing the model in IDC is attached. + +# Experiments: +Various models, including Llama2, Flang T5, Mistral 7b, and Zephyr 7b, were explored for summarization and data generation. Despite encountering several challenges, the Zephyr 7b model emerged as the preferred choice due to its superior performance compared to other models of similar size. + +# Usecase of Intel Developer cloud +The Intel Developer Cloud proves to be an excellent platform, offering access to powerful CPUs and high-speed internet, thereby facilitating a remarkably swift process. This challenges the misconception that LLM training necessitates GPU usage. The experimentation phase demonstrated that faster inferencing and training are achievable with different models on this platform. + +For our misfortune at last moment when we trained the model with actual data, it got disconnected, which made us not use it at present, and the codes and screenshots are attached and the model is trained on other platform as per suggestion of intel team. + +![image](https://github.com/Sriram-code/Intel-Hackathon/assets/75485469/ddbbc853-fea6-4e7f-b628-13de9982fe9d) + + +# Final output +1.Smart Legal Companion + +Our model is now proficient in addressing inquiries related to Indian law, referencing crucial legal judgments, and comprehending and elucidating the nuances inherent in various laws and acts. Notably, it achieves this with significantly reduced inference time, providing efficient and accurate responses. The notebook named Simple inferencing with LLM and simple RAG chatbot in training notebooks proved to be very helpful while performing these activities. + +Thanks to Intel AI Anlytics toolkit, which have made the inferencing much easy and fast compared with that of befor .only few line change have improved performance manyfold. + + from intel_extension_for_transformers.neural_chat import build_chatbot + chatbot = build_chatbot() + response = chatbot.predict("Tell me about Intel Xeon Scalable Processors.") + + from transformers import AutoTokenizer, TextStreamer + from intel_extension_for_transformers.transformers import AutoModelForCausalLM + model_name = "HuggingFaceH4/zephyr-7b-beta" + prompt = "Once upon a time, there existed a little girl," + + tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) + inputs = tokenizer(prompt, return_tensors="pt").input_ids + streamer = TextStreamer(tokenizer) + + model = AutoModelForCausalLM.from_pretrained(model_name, load_in_8bit=True) + outputs = model.generate(inputs, streamer=streamer, max_new_tokens=500) + +these small changes have improved the performance very well + +2. Chat with PDF + Introducing an advanced feature for our application: the capability to streamline the extraction and analysis of information from lengthy and intricate PDF documents. Users can effortlessly upload their documents, and our system will intelligently parse the content, breaking it down into manageable chunks. These chunks are then embedded and stored in a vector format, creating a sophisticated knowledge repository. +When users pose questions, the application efficiently retrieves and presents the pertinent content chunks. These are seamlessly fed into a Language Model (LLM) to generate accurate and contextually relevant responses. This innovative approach not only enhances the user experience by saving time and effort but also ensures that the Language Model is grounded in the latest and most relevant information. +Furthermore, our application is optimized with cutting-edge Intel libraries, ensuring unparalleled performance and efficiency. This strategic integration with Intel technologies not only enhances the current capabilities of the application but also positions it for seamless adaptation to future advancements in the field. +As we strive for continuous improvement, our roadmap includes the incorporation of voice recognition capabilities. Users will be able to interact with the application through voice commands, receiving responses in a natural and conversational manner. This not only adds a personalized touch but also transforms the application into a companion for legal professionals, providing an immersive and efficient experience. + +3. Advanced Document Summarization System + In this process, the model undergoes fine-tuning using data and summarized text derived from the quantized model of Zephyr-7b-beta. As a result, the model is now capable of providing a comprehensive summary of text spanning up to approximately 3000 words while retaining all crucial points, a validation confirmed through GPT-4 evaluation. +The code for fine tuning and dataset are attached + +4. Legal Document Generator + Our model successfully produces one-page legal documents such as Power of Attorney and contract documents in the correct format. It has been trained on a dataset comprising prompts and their corresponding final documents. While there is ample room for further enhancements, refining the fine-tuning process holds promise for achieving superior results. +Thecode for finetuning dataset are attached. + +# Future scope + +1.To enhance performance through increased computational capacity, we aim to construct an expansive dataset for the aforementioned use cases. This augmented dataset will serve as the foundation for retraining a more robust language model, enabling superior capabilities. The intention is to leverage advanced computing power to refine and elevate the model's proficiency in generating legal documents. + +2.We embarked on retraining the model using Intel frameworks and incorporated quantization with NNCF. This approach yielded improved results, showcasing the model's enhanced performance. However, a setback occurred as our session expired, preventing us from saving the valuable progress made during this training endeavor. Despite this challenge, the discernible advancements in model performance underscored the effectiveness of the adopted methodologies. + + +3.Enhancing the model's hardware versatility, we aim to achieve GPU independence by seamlessly integrating Intel frameworks such as Pandas and NumPy into the frontend. This strategic implementation not only ensures improved efficiency but also contributes to the overall optimization of the application. By fostering compatibility with Intel frameworks at the frontend, we empower the model to operate seamlessly across different hardware configurations, thereby enhancing its accessibility and performance. + + +4.In order to enhance the functionality of our document creation application, we are incorporating voice capability for an improved user experience. Additionally, we are implementing automatic printing and document validation features to streamline the entire document creation process. This integration aims to provide users with a more efficient and seamless workflow, reducing manual efforts and ensuring the accuracy and completeness of generated documents. + +5.To ensure widespread accessibility and future usability, the team is strategically planning to deploy the application in the cloud. This strategic decision aims to provide legal professionals and lawyers across India with seamless access, facilitating widespread utilization of the platform. The deployment in the cloud not only enhances scalability but also fosters collaboration, enabling legal practitioners to leverage the tool efficiently and contribute to the broader legal community. + +# Learnings and Insight + +1.Specialized NLP Focus: + - Expanding expertise in NLP, specifically in question answering and text generation. + +2.End-to-End Legal Assistant Application: + - Training the Large Language Model (LLM) for a comprehensive legal assistant application. + - Enabling simultaneous capabilities for text generation and question answering. + +3.Framework Exploration: + + - Investigating different frameworks and fine-tuning methods. + - Experimenting with models such as Llama2 13b, Flang T5, and Mistral 7b. + - Identifying compatibility issues, with only the quantized Zephyr 7b model proving suitable for training with the available dataset. + +4.Intel Technologies Integration: + + - Acquiring knowledge about Intel technologies, specifically IDC and one API. + - Experimenting with features like quantization with CPU and Intel-optimized fine-tuning. + + # Future Application Enhancement for intel: + - Recognizing the prospective benefits of integrating Intel's features in future iterations. + - Envisaging heightened end-to-end application performance through the strategic application of recently acquired insights and technologies. + + - Currently, our trained models undergo quantization to the GPTQ format for optimized performance, requiring the use of GPUs. Looking ahead, there is a potential shift towards quantizing them to GGML or OV formats, facilitating efficient inferencing even with CPU resources, as an alternative to the current methodology. + +# Tech stack used: +---------------streamlit +---------------IDC Jupyter Notebook Intel AI analytic ToolKit +---------------Model T5 zephyr + + +# Conclusion: + +In conclusion, "Enlightening Justice" not only showcases the transformative power of AI and the Intel OneAPI AI Analytics Toolkit in the legal domain but also highlights the resilience of the team in overcoming challenges. The successful creation of a Smart Legal Companion, Advanced Document Summarization System, and Legal Document Generator underscores the project's positive impact on legal professionals. Despite setbacks, the project's adaptability and forward-thinking approach ensure a promising trajectory for future advancements, marking a significant step toward revolutionizing legal support through cutting-edge technology and innovative solutions. + + +# Quick Steps + +Required installation + +```pip install faiss-cpu streamlit langchain huggingface_hub sentence_transformers pypdf peft streamlit_option_menu auto-gptq optimum diffusers``` + +clone repository + +``` +https://github.com/Sriram-code/Intel-Hackathon +``` + + +# Application +Built using streamlit + +``` streamlit run done.py ``` +set the path of done.py from hackathon folder + +IDC remote SSH in VS code +connecting idc remote server through to vs code using the remote SSH plugin + +Tunneling the instance using ngrok + +run the streamlit app to launch the instance + + diff --git a/architects_of_future/db/index.faiss b/architects_of_future/db/index.faiss new file mode 100644 index 00000000..2140cf04 Binary files /dev/null and b/architects_of_future/db/index.faiss differ diff --git a/architects_of_future/db/index.pkl b/architects_of_future/db/index.pkl new file mode 100644 index 00000000..9b6de54c Binary files /dev/null and b/architects_of_future/db/index.pkl differ diff --git a/architects_of_future/hackathon/COI...pdf b/architects_of_future/hackathon/COI...pdf new file mode 100644 index 00000000..f77c0a58 Binary files /dev/null and b/architects_of_future/hackathon/COI...pdf differ diff --git a/architects_of_future/hackathon/about.png b/architects_of_future/hackathon/about.png new file mode 100644 index 00000000..73a8529c Binary files /dev/null and b/architects_of_future/hackathon/about.png differ diff --git a/architects_of_future/hackathon/appdocs.png b/architects_of_future/hackathon/appdocs.png new file mode 100644 index 00000000..df32454d Binary files /dev/null and b/architects_of_future/hackathon/appdocs.png differ diff --git a/architects_of_future/hackathon/book.png b/architects_of_future/hackathon/book.png new file mode 100644 index 00000000..bcc2fe49 Binary files /dev/null and b/architects_of_future/hackathon/book.png differ diff --git a/architects_of_future/hackathon/bot.png b/architects_of_future/hackathon/bot.png new file mode 100644 index 00000000..9395dbf5 Binary files /dev/null and b/architects_of_future/hackathon/bot.png differ diff --git a/architects_of_future/hackathon/config.toml b/architects_of_future/hackathon/config.toml new file mode 100644 index 00000000..a6e205a5 --- /dev/null +++ b/architects_of_future/hackathon/config.toml @@ -0,0 +1,5 @@ +[theme] +base="light" +primaryColor="#001e3d" +secondaryBackgroundColor="#dbdbe2" +textColor="#001e3d" diff --git a/architects_of_future/hackathon/done.py b/architects_of_future/hackathon/done.py new file mode 100644 index 00000000..6432390b --- /dev/null +++ b/architects_of_future/hackathon/done.py @@ -0,0 +1,574 @@ + +# from huggingface_hub import hf_hub_download +# import pandas as pd +# import pydeck as pdk +# from langchain.document_loaders import PyPDFLoader, OnlinePDFLoader +# import textwrap +# from langchain.text_splitter import CharacterTextSplitter +import time +# import requests +# import numpy as np +# import fitz +import streamlit as st +import os +from langchain.embeddings import HuggingFaceEmbeddings +from langchain.vectorstores import FAISS +from langchain.chains.question_answering import load_qa_chain +from langchain.llms import HuggingFaceHub +import torch +from transformers import pipeline +from peft import AutoPeftModelForCausalLM +from transformers import GenerationConfig +from transformers import AutoTokenizer +from streamlit_option_menu import option_menu +import PyPDF4 +from io import BytesIO + + + + +def loading_dataKnowledge(): + ############################### RAG GETTING ANSWERS FROM KNOWLEDGEBASE ############################################## + ###############################LOADING DOCUMENT IN DB AND MAKING IT TO VECTOR######################################## + # loader = PyPDFLoader("C:\\Users\\HP\\Downloads\\SpeechToSpeechBot\\SpeechToSpeechBot\\hackathon\\COI.pdf") + #loader = PyPDFLoader("/content/The-Field-Guide-to-Data-Science.pdf") + #!gdown "https://drive.google.com/uc?id=15hUEJQViQDxu_fnJeO_Og1hGqykCmJut&confirm=t" + # data = loader.load() + # documents=data + # text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) + # docs = text_splitter.split_documents(documents) + ###################################################################################################################### + + ###################################LOADING LOCALY STORD DB############################################################ + os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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" + embeddings = HuggingFaceEmbeddings() + db =FAISS.load_local("/home/ubuntu/jupyter_env/Intel-Hackathon/hackathon/db", embeddings) + llm=HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta", model_kwargs={"temperature":0.7, "max_length":1024}) #USEING ZEPHUR FOR GETING DOMINE KNOWLEDGE WITH TRAINED DATA + chain = load_qa_chain(llm, chain_type="stuff") + return db,chain + +##############################################INFERENCING LLM MODEL USING intel_extension_for_transformers######################################### +def Zephyr_response(prompt): # SAMPLE INFRENCING HuggingFaceH4/zephyr-7b-beta TO RUN IN INTEL DEVELOPER CLOUD NOTEBOOK + + import subprocess + + # Uninstall the existing transformers version + subprocess.run(['pip', 'uninstall', '-y', 'transformers']) + + # Install the desired version of transformers + subprocess.run(['pip', 'install', 'transformers==4.34.1']) + subprocess.run(['pip', 'install', 'intel-extension-for-transformers']) + from transformers import AutoTokenizer, TextStreamer + from intel_extension_for_transformers.transformers import AutoModelForCausalLM + + + model_name = "HuggingFaceH4/zephyr-7b-beta" # Hugging Face model_id or local model + tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) + inputs = tokenizer(prompt, return_tensors="pt").input_ids + streamer = TextStreamer(tokenizer) + model = AutoModelForCausalLM.from_pretrained(model_name, load_in_8bit=False) + outputs = model.generate(inputs, streamer=streamer, max_new_tokens=300) + generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) + print(generated_text) + return generated_text +def process_data_sample2(example): # PROMPT TUNING TO GET REQUIRED RESPONSE FOR LEGAL DOCUMENT + + processed_example = "<|system|>\n You are legal document generator and is going to prepare a document for user with given prompt. you always give best document possible in correct format.ensure that the document is always complete.it should not be incomplete.\n<|user|>\n" + example["instruction"] + "\n<|assistant|>\n" + + return processed_example + +def doucument_creation(sentence): # DOCUMENT CREATING USING FINE TUNED LLM + tokenizer = AutoTokenizer.from_pretrained("/content/drive/MyDrive/intel hackathon/simple1") #LOADING OUR MODEL FROM LOCAL DRIVE + inp_str = process_data_sample2( + { + "instruction": sentence, + } + ) + + inputs = tokenizer(inp_str, return_tensors="pt").to("cuda") #SINCE OUR LLM MODEL IS GPTQ WE REQUIRE CUDA OR SOME GPU + + model = AutoPeftModelForCausalLM.from_pretrained( + "/content/drive/MyDrive/intel hackathon/simple1", + low_cpu_mem_usage=True, + return_dict=True, + torch_dtype=torch.float16, + device_map="cuda") + + generation_config = GenerationConfig( + do_sample=True, + top_k=1, + temperature=0.1, + max_new_tokens=256, + pad_token_id=tokenizer.eos_token_id + ) + return model,generation_config,inputs,tokenizer + + +def process_data_sample(example): # PROMPT TUNING TO GET REQUIRED RESPONSE FOR SUMMARIZATION + processed_example = "<|system|>\n You are document sumarizer who is going to summarise the content without missing any keypoints in a concise manner.Truncate the input if it it beyond length you can handle.always give a complete sentence which makes sense and inform how much word you can handle and take care of grammer and use Capital letter wereever nessary\n<|user|>\n" + example["instruction"] + "\n<|assistant|>\n" + return processed_example + +def document_summarization(sentence): + tokenizer = AutoTokenizer.from_pretrained("sample2") + inp_str = process_data_sample( + { + "instruction": sentence, + } + ) + inputs = tokenizer(inp_str, return_tensors="pt").to("cuda") #SINCE OUR LLM MODEL IS GPTQ WE REQUIRE CUDA OR SOME GPU + + model = AutoPeftModelForCausalLM.from_pretrained( + "sample2", + low_cpu_mem_usage=True, + return_dict=True, + torch_dtype=torch.float16, + device_map="cuda") + + generation_config = GenerationConfig( + do_sample=True, + top_k=1, + temperature=0.1, + max_new_tokens=256, + pad_token_id=tokenizer.eos_token_id + ) + return model,generation_config,inputs,tokenizer + + + + + +#st.title="IntelliLegalHub" +st.set_page_config( + page_title="IntellLegalHub", layout="wide", + page_icon="⚖️", +) + +# Define header + +def sign_up(): + # Your sign-up logic goes here + st.write("Signing up...") + +# Function to simulate login action +def log_in(): + # Your login logic goes here + st.write("Logging in...") + +header_container = st.container() +with header_container: + col1, col2, col3 = st.columns([3, 1, 1]) + + with col1: + # Add logo at the start of your application + st.image("image.png",use_column_width=True) # Adjust width as needed + + with col3: + st.write("") # Placeholder for alignment + col_signup, col_login = st.columns(2) + + with col_signup: + if st.button("Sign Up"): + sign_up() + + with col_login: + if st.button("Log In"): + log_in() + +with st.sidebar: + selected = option_menu( + menu_title="IntellLegalHub", # required + options=["Home", "Document Creator", "Legal Insights Hub", "Document Summarizer & Reader"], # required + icons=["house", "book", "search", "book-fill"], # optional + menu_icon="robot", # optional + default_index=0, # optional + ) + + +if selected == "Home": + + st.markdown("---") + word=''' + Welcome to IntellLegalHub, where innovation meets legal expertise. Our platform is dedicated to revolutionizing the legal landscape by harnessing the power of artificial intelligence. Designed to assist lawyers in their work, our AI-powered solution is finely tuned with the latest in Legal Language Models + ''' + st.write(word) + +# Define headers and text + lines = [ + "AI Empowered Document Creation
   Create legally binding documents effortlessly. Craft Non-Disclosure Agreements, Wills, Articles of Incorporation, and more, all powered by cutting-edge AI precision.", + "AI-Driven Legal Insights Hub
    Dive into legal intricacies effortlessly. Explore IPC and CRPC sections, previous case judgments, law amendments, exceptions, and specialized interpretations with our AI-powered legal insights hub.", + "AI-Enabled Document Summarizer & Reader
   Condense complexity, elevate understanding. Summarize extensive documents, including judgments, and convert them into audio, simplifying comprehension with AI-enabled summarization and reader features." + ] + colors = ["#ffffff", "#ffffff", "#ffffff"] # Different shades of gray + + # Display each line with a padding box and background color + for i in range(len(lines)): + st.markdown( + f""" +
{lines[i]}
+ """, + unsafe_allow_html=True + ) + +if selected == "Document Creator": + st.markdown("---") + + ttl_container = st.container() + with ttl_container: + col1, col2 = st.columns([1, 9]) + with col1: + st.image("paper.png",width=70) + with col2: + st.markdown( + + f""" +
+

AI Powered Document Creation

+
+ """, + unsafe_allow_html=True, + ) + + + # Document types + + def display_content(): + lines = [ + "Non - Disclousure Agreement (NDA)             Will and Testament", + "Articles of Incorporation Contract             Power of Attorney" + ] + colors = ["#ffffff", "#ffffff"] # Background color: gray + + for i in range(len(lines)): + st.markdown( + f""" +
+
+ {lines[i]} +
+
+ """, + unsafe_allow_html=True + ) + + button_label = "⮟" + st.write("

To view all features

", unsafe_allow_html=True) + display_content_flag = False + + if st.button(button_label): + display_content_flag = not display_content_flag + if display_content_flag: + display_content() + + if display_content_flag: + st.button("⮝") + else: + st.empty() + + + st.markdown("---") + + # Title for chat + st.write("How can I help you today?") + # Chat input + message = st.text_input("", placeholder="Type your message here...") + # Display user messages + try: + if st.button("Send ➤"): + st.image("user.png", width=50) + st.write("", message) + msg=message+" Give me the legal document based on Indian law for above query, and in 1 page and in correct format, give me the complete document only the assistant response dont give the system and user passage ,The document should be complete and in proper format" + model,config,inputs,tokenizer=doucument_creation(message) + outputs = model.generate(**inputs, generation_config=config) + bot_response=tokenizer.decode(outputs[0], skip_special_tokens=True) + # Compute the bot response + # bot_response =Zephyr_response(msg) + parts = bot_response.split("<|assistant|>", 1) + result="" + if len(parts) > 1: + # Extract the text before the substring + result = parts[1] + print(result) + else: + print("something went wrong") + bot_response=result + + #Display the bot response in the main window + st.image("bot.png", width=50) + st.write("", bot_response) + except Exception as e: + bot_response =Zephyr_response(msg) + st.image("bot.png", width=50) + st.write("", bot_response) + finally: + # Chat display + st.markdown("---") + st.subheader("Chat History") + st.empty() + +def response(msg,db): + docs = db.similarity_search(msg) + + return docs +if selected == "Legal Insights Hub": + + #splitdataset()if selected == "Legal Insights Hub": + st.markdown("---") + ttl1_container = st.container() + with ttl1_container: + col1, col2 = st.columns([1, 9]) + with col1: + st.image("loupe.png",width=70) + with col2: + st.markdown( + f""" +
+

AI - Driven Legal Insights Hub

+
+ """, + unsafe_allow_html=True, + ) + +# Content for AI - Driven Legal Insights Hub section + def display_content(): + lines = [ + "Queries IPC and CRPC Sections             Queries previous case Judgements", + "Checks for amendments in laws             Checks for exceptions from clause", + "Checks for special interpretation for the law" + ] + colors = ["#ffffff", "#ffffff", "#ffffff", "#ffffff"] # Background color: gray + + for i in range(len(lines)): + st.markdown( + f""" +
+
{lines[i]}
+
+ """, + unsafe_allow_html=True + ) + + + db,chain=loading_dataKnowledge() + button_label = "⮟" + st.write("

To view all features

", unsafe_allow_html=True) + display_content_flag = False + + if st.button(button_label): + display_content_flag = not display_content_flag + if display_content_flag: + display_content() + + if display_content_flag: + st.button("⮝") + else: + st.empty() + + + st.markdown("---") + + st.write("What infromation do you want to know?") + + # Chat input + message = st.text_input("", placeholder="Type your message here...") + # Display user messages + try: + if st.button("Send ➤"): + st.image("user.png", width=50) + st.write("", message) + msg=message+" ? Give me the response based on content given if available , stop at full stop. if not use your knowledge. Ensure the answer is precise and complete. Be on the point while answering and try to be legally sound.Your Response should be always complete." + + + # Compute the bot response + bot_response =chain.run(input_documents=response(msg,db) , question= msg) + + + # Display the bot response in the main window + st.image("bot.png", width=50) + st.write("", bot_response) + except Exception as e: + st.image("bot.png", width=50) + st.write("SomeThing went wrong") + finally: + st.markdown("---") + st.subheader("Chat History") + st.empty() + +if selected == "Document Summarizer & Reader": + st.markdown("---") + ttl2_container = st.container() + with ttl2_container: + col1, col2 = st.columns([1, 9]) + with col1: + st.image("reading.png",width=70) + with col2: + st.markdown( + f""" +
+

AI enabled Document Summarizer & Reader

+
+ """, + unsafe_allow_html=True, + ) + + # Creating narrower boxes in a single row with three columns + def display_content(): + lines = [ + "Summarizes Judgements             Summarizes long documents", + "Converts the summarized document to audio" + ] + colors = ["#ffffff", "#ffffff"] # Background color: gray + + for i in range(len(lines)): + st.markdown( + f""" +
{lines[i]}
+ """, + unsafe_allow_html=True + ) + + button_label = "⮟" + st.write("

To view all features

", unsafe_allow_html=True) + display_content_flag = False + + if st.button(button_label): + display_content_flag = not display_content_flag + if display_content_flag: + display_content() + + if display_content_flag: + st.button("⮝") + else: + st.empty() + + st.markdown("---") + + st.write("How can I help you today?") + message_from_pdf="" + def read_pdf(file): + pdf_reader=PyPDF4.PdfFileReader(file) + text="" + for page in range(pdf_reader.getNumPages()): + page_obj=pdf_reader.getPage(page) + text +=page_obj.extractText() + return text + + uploaded_file = st.file_uploader("Upload a PDF", type="pdf") + print(uploaded_file) + if uploaded_file is not None: + # Process the uploaded file (for example, display its content) + file_contents = uploaded_file.read() + text=read_pdf(BytesIO(file_contents)) + message_from_pdf=text + + # print(text+"HHHHHHHHHHHHHHHHHHHHHHHH") + + message = st.text_input("", placeholder="Type your message here...") + message=message_from_pdf + # Display user messages + try: + if st.button("Send ➤"): + st.image("user.png", width=50) + st.write("", message) + msg=message+"summarize the above content without loosing any key points" + model,con,inp,tokenizer=document_summarization(msg) + outputs = model.generate(**inp, generation_config=con) + bot_response=(tokenizer.decode(outputs[0], skip_special_tokens=True)) + print(bot_response+"************************************************") + parts = bot_response.split("<|assistant|>", 1) + result="" + if len(parts) > 1: + # Extract the text before the substring + result = parts[2] + print(result) + else: + print("Substring not found in the string.") + + # Display the bot response in the main window + + st.image("bot.png", width=50) + st.write("", bot_response) + except Exception as e: + bot_response =Zephyr_response(msg) + st.image("bot.png", width=50) + + st.write("", bot_response) + finally: + st.markdown("---") + st.subheader("Chat History") + st.empty() + + + + + +#i button +def display_info_buttons(): + sb1_container = st.sidebar.container() + with sb1_container: + col1, col2 = st.columns([1, 7]) + with col1: + st.image("about.png", width=30) + with col2: + st.button("About", key="about_button") + + sb2_container = st.sidebar.container() + with sb2_container: + col1, col2 = st.columns([1, 7]) + with col1: + st.image("help.png", width=30) + with col2: + st.button("Help", key="help_button") + + sb3_container = st.sidebar.container() + with sb3_container: + col1, col2 = st.columns([1, 7]) + with col1: + st.image("appdocs.png", width=30) + with col2: + st.button("Document", key="document_button") + +# Sidebar info button +info_clicked = st.sidebar.button("ℹ️") +if info_clicked: + if "displayed" not in st.session_state: + st.session_state.displayed = False # Initialize the state if not set + + if st.session_state.displayed: + st.session_state.displayed = False # Hide content if already displayed + else: + display_info_buttons() # Show buttons if not displayed + st.session_state.displayed = True + +logo=st.sidebar.container() +with logo: + st.image("logo.png") + +#footer in sidebar +footer_container = st.sidebar.container() +with footer_container: + st.markdown( + f""" +
+

Follow Us on

+ +
+ """, + unsafe_allow_html=True, ) + col1, col2, col3,col4 = st.columns([1, 1, 1,1]) + with col1: + st.image("facebook.png") + with col2: + st.image("whatsapp.png") + with col3: + st.image("instagram.png") + with col4: + st.image("linkedin.png") + st.markdown( + f""" +
+

Contact Us:  IntelliLegalHub

+

©️ 2023 IntelliLegalHub. All rights reserved.

+ +
+ """, + unsafe_allow_html=True, ) diff --git a/architects_of_future/hackathon/facebook.png b/architects_of_future/hackathon/facebook.png new file mode 100644 index 00000000..1a44cdc0 Binary files /dev/null and b/architects_of_future/hackathon/facebook.png differ diff --git a/architects_of_future/hackathon/gmail.png b/architects_of_future/hackathon/gmail.png new file mode 100644 index 00000000..8385916a Binary files /dev/null and b/architects_of_future/hackathon/gmail.png differ diff --git a/architects_of_future/hackathon/help.png b/architects_of_future/hackathon/help.png new file mode 100644 index 00000000..83c71aa6 Binary files /dev/null and b/architects_of_future/hackathon/help.png differ diff --git a/architects_of_future/hackathon/image.png b/architects_of_future/hackathon/image.png new file mode 100644 index 00000000..87f2ad35 Binary files /dev/null and b/architects_of_future/hackathon/image.png differ diff --git a/architects_of_future/hackathon/instagram.png b/architects_of_future/hackathon/instagram.png new file mode 100644 index 00000000..13337929 Binary files /dev/null and b/architects_of_future/hackathon/instagram.png differ diff --git a/architects_of_future/hackathon/linkedin.png b/architects_of_future/hackathon/linkedin.png new file mode 100644 index 00000000..7adc9c8e Binary files /dev/null and b/architects_of_future/hackathon/linkedin.png differ diff --git a/architects_of_future/hackathon/logo.png b/architects_of_future/hackathon/logo.png new file mode 100644 index 00000000..97ee526a Binary files /dev/null and b/architects_of_future/hackathon/logo.png differ diff --git a/architects_of_future/hackathon/loupe.png b/architects_of_future/hackathon/loupe.png new file mode 100644 index 00000000..f2c1ee71 Binary files /dev/null and b/architects_of_future/hackathon/loupe.png differ diff --git a/architects_of_future/hackathon/paper.png b/architects_of_future/hackathon/paper.png new file mode 100644 index 00000000..5cae6201 Binary files /dev/null and b/architects_of_future/hackathon/paper.png differ diff --git a/architects_of_future/hackathon/reading.png b/architects_of_future/hackathon/reading.png new file mode 100644 index 00000000..704ecd15 Binary files /dev/null and b/architects_of_future/hackathon/reading.png differ diff --git a/architects_of_future/hackathon/user.png b/architects_of_future/hackathon/user.png new file mode 100644 index 00000000..4e2313a3 Binary files /dev/null and b/architects_of_future/hackathon/user.png differ diff --git a/architects_of_future/hackathon/whatsapp.png b/architects_of_future/hackathon/whatsapp.png new file mode 100644 index 00000000..e607a582 Binary files /dev/null and b/architects_of_future/hackathon/whatsapp.png differ diff --git a/architects_of_future/requirements.txt b/architects_of_future/requirements.txt new file mode 100644 index 00000000..2bdefb65 --- /dev/null +++ b/architects_of_future/requirements.txt @@ -0,0 +1,207 @@ +accelerate==0.25.0 +aiohttp==3.9.1 +aiosignal==1.3.1 +altair==5.2.0 +annotated-types==0.6.0 +anyio==4.1.0 +argon2-cffi==23.1.0 +argon2-cffi-bindings==21.2.0 +arrow==1.3.0 +asttokens==2.4.1 +async-lru==2.0.4 +async-timeout==4.0.3 +attrs==23.1.0 +auto-gptq==0.6.0 +Babel==2.14.0 +beautifulsoup4==4.12.2 +bleach==6.1.0 +blinker==1.7.0 +cachetools==5.3.2 +certifi==2023.11.17 +cffi==1.16.0 +charset-normalizer==3.3.2 +click==8.1.7 +coloredlogs==15.0.1 +comm==0.2.0 +contextlib2==21.6.0 +contourpy==1.2.0 +cycler==0.12.1 +dataclasses-json==0.6.3 +datasets==2.15.0 +debugpy==1.8.0 +decorator==5.1.1 +defusedxml==0.7.1 +Deprecated==1.2.14 +diffusers==0.24.0 +dill==0.3.7 +exceptiongroup==1.2.0 +executing==2.0.1 +faiss-cpu==1.7.4 +fastjsonschema==2.19.0 +filelock==3.13.1 +fonttools==4.46.0 +fqdn==1.5.1 +frozenlist==1.4.0 +fsspec==2023.10.0 +gekko==1.0.6 +gitdb==4.0.11 +GitPython==3.1.40 +greenlet==3.0.2 +gTTS==2.4.0 +huggingface-hub==0.17.3 +humanfriendly==10.0 +idna==3.6 +importlib-metadata==6.11.0 +install==1.3.5 +intel-extension-for-transformers==1.2.2 +ipykernel==6.27.1 +ipython==8.18.1 +isoduration==20.11.0 +jedi==0.19.1 +Jinja2==3.1.2 +joblib==1.3.2 +json5==0.9.14 +jsonpatch==1.33 +jsonpointer==2.4 +jsonschema==4.20.0 +jsonschema-specifications==2023.11.2 +jupyter-events==0.9.0 +jupyter-lsp==2.2.1 +jupyter_client==8.6.0 +jupyter_core==5.5.0 +jupyter_server==2.12.1 +jupyter_server_terminals==0.5.0 +jupyterlab==4.0.9 +jupyterlab_pygments==0.3.0 +jupyterlab_server==2.25.2 +kiwisolver==1.4.5 +langchain==0.0.350 +langchain-community==0.0.3 +langchain-core==0.1.1 +langsmith==0.0.70 +markdown-it-py==3.0.0 +MarkupSafe==2.1.3 +marshmallow==3.20.1 +matplotlib==3.8.2 +matplotlib-inline==0.1.6 +mdurl==0.1.2 +mistune==3.0.2 +mpmath==1.3.0 +multidict==6.0.4 +multiprocess==0.70.15 +mypy-extensions==1.0.0 +nbclient==0.9.0 +nbconvert==7.12.0 +nbformat==5.9.2 +nest-asyncio==1.5.8 +networkx==3.2.1 +neural-compressor==2.4 +nltk==3.8.1 +notebook_shim==0.2.3 +npx==0.1.1 +numpy==1.26.2 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==8.9.2.26 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-nccl-cu12==2.18.1 +nvidia-nvjitlink-cu12==12.3.101 +nvidia-nvtx-cu12==12.1.105 +opencv-python-headless==4.8.1.78 +optimum==1.16.0 +overrides==7.4.0 +packaging==23.2 +pandas==2.1.4 +pandocfilters==1.5.0 +parso==0.8.3 +peft==0.7.1 +pexpect==4.9.0 +Pillow==10.1.0 +pipeline==0.1.0 +platformdirs==4.1.0 +prettytable==3.9.0 +prometheus-client==0.19.0 +prompt-toolkit==3.0.43 +protobuf==4.25.1 +psutil==5.9.6 +ptyprocess==0.7.0 +pure-eval==0.2.2 +py-cpuinfo==9.0.0 +pyarrow==14.0.1 +pyarrow-hotfix==0.6 +pycocotools==2.0.7 +pycparser==2.21 +pydantic==2.5.2 +pydantic_core==2.14.5 +pydeck==0.8.1b0 +Pygments==2.17.2 +pyparsing==3.1.1 +pypdf==3.17.2 +PyPDF4==1.27.0 +python-dateutil==2.8.2 +python-json-logger==2.0.7 +pyttsx3==2.90 +pytz==2023.3.post1 +PyYAML==6.0.1 +pyzmq==25.1.2 +referencing==0.32.0 +regex==2023.10.3 +requests==2.31.0 +rfc3339-validator==0.1.4 +rfc3986-validator==0.1.1 +rich==13.7.0 +rouge==1.0.1 +rpds-py==0.13.2 +safetensors==0.4.1 +schema==0.7.5 +scikit-learn==1.3.2 +scipy==1.11.4 +Send2Trash==1.8.2 +sentence-transformers==2.2.2 +sentencepiece==0.1.99 +six==1.16.0 +smmap==5.0.1 +sniffio==1.3.0 +soupsieve==2.5 +SQLAlchemy==2.0.23 +stack-data==0.6.3 +streamlit==1.29.0 +streamlit-option-menu==0.3.6 +sympy==1.12 +tenacity==8.2.3 +terminado==0.18.0 +threadpoolctl==3.2.0 +tinycss2==1.2.1 +tokenizers==0.14.1 +toml==0.10.2 +tomli==2.0.1 +toolz==0.12.0 +torch==2.1.2 +torchvision==0.16.2 +tornado==6.4 +tqdm==4.66.1 +traitlets==5.14.0 +transformers==4.34.1 +triton==2.1.0 +types-python-dateutil==2.8.19.14 +typing-inspect==0.9.0 +typing_extensions==4.9.0 +tzdata==2023.3 +tzlocal==5.2 +uri-template==1.3.0 +urllib3==2.1.0 +validators==0.22.0 +watchdog==3.0.0 +wcwidth==0.2.12 +webcolors==1.13 +webencodings==0.5.1 +websocket-client==1.7.0 +wrapt==1.16.0 +xxhash==3.4.1 +yarl==1.9.4 +zipp==3.17.0