{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# QuickStart" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "nbsphinx": "hidden" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The autoreload extension is already loaded. To reload it, use:\n", " %reload_ext autoreload\n" ] } ], "source": [ "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "nbsphinx": "hidden", "slideshow": { "slide_type": "skip" } }, "outputs": [], "source": [ "import IPython.display as ipd\n", "from IPython.core.interactiveshell import InteractiveShell\n", "InteractiveShell.ast_node_interactivity = \"all\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Build Image Search In 5 Minutes" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "collection_name = 'pokemon_images'\n", "\n", "documents = []\n", "for i in range(1, 20):\n", " documents.append({\n", " 'image': 'https://assets.pokemon.com/assets/cms2/img/pokedex/full/{}.png'.format(f'{i:03}'),\n", " 'pokemon_id' : str(i),\n", " '_id': i\n", " })" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Logged in. Welcome public-demo. To view list of available collections, call list_collections() method.\n" ] }, { "data": { "text/plain": [ "{'status': 'complete', 'message': 'pokemon_images deleted'}" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#1. specify the vdb client\n", "from vectorai.client import ViClient\n", "vi_client = ViClient(username, api_key, url)\n", "vi_client.delete_collection(collection_name)\n", "\n", "#2. specify an image encoder\n", "from vectorai.models.deployed import ViImage2Vec\n", "image_encoder = ViImage2Vec(username, api_key, url)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "82cb6f5ed0d5415eac27727b6ee1bba8", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] }, { "data": { "text/plain": [ "{'inserted_successfully': 19, 'failed': 0, 'failed_document_ids': []}" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#3. insert the documents and encode images simultaneously\n", "# using jobs means that the encoding process takes place on our servers as opposed to your computer\n", "use_jobs = False\n", "\n", "if use_jobs:\n", " vi_client.insert_documents(collection_name, documents)\n", " job = vi_client.encode_image_job(collection_name, 'image')\n", " vi_client.wait_till_jobs_complete(collection_name, job['job_id'], job['job_name'])\n", "else:\n", " vi_client.insert_documents(collection_name, documents, models={'image':image_encoder.encode})" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
_idimagepokemon_idinsert_date__search_score
0332020-10-02T07:07:18.7995591.000000
1222020-10-02T07:07:18.7976930.920337
2112020-10-02T07:07:18.7956550.838996
317172020-10-02T07:07:30.5878410.835111
416162020-10-02T07:07:30.5853990.813012
" ], "text/plain": [ "" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#4. search\n", "search_results = vi_client.search(collection_name,\n", " image_encoder.encode('https://assets.pokemon.com/assets/cms2/img/pokedex/full/003.png'), \n", " 'image_vector_', page_size=5)\n", "\n", "#4.2 first result is the query audio itself\n", "vi_client.show_json(search_results, image_fields=['image'], image_width=150)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
_idimagepokemon_idinsert_date__search_score
0222020-10-02T07:07:18.7976931.000000
1332020-10-02T07:07:18.7995590.920337
2112020-10-02T07:07:18.7956550.895991
3772020-10-02T07:07:18.8071990.839945
417172020-10-02T07:07:30.5878410.833277
" ], "text/plain": [ "" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#5 recommendation by id\n", "search_by_id_results = vi_client.search_by_id(collection_name, '2', 'image_vector_', page_size=5)\n", "\n", "#5.2 first result is the id's audio itself\n", "vi_client.show_json(search_by_id_results, image_fields=['image'], image_width=150)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Build Audio Search in 5 Minutes" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Building Audio search is easy with Vi!" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "collection_name = 'audio_quickstart'\n", "\n", "#create the documents\n", "documents = []\n", "for i in range(1, 1001):\n", " documents.append({\n", " 'audio': 'https://vecsearch-bucket.s3.us-east-2.amazonaws.com/voices/common_voice_en_{}.wav'.format(i),\n", " 'name' : 'common_voice_en_{}.wav'.format(i),\n", " '_id': i\n", " })" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Logged in. Welcome public-demo. To view list of available collections, call list_collections() method.\n" ] }, { "data": { "text/plain": [ "{'status': 'complete', 'message': 'audio_quickstart deleted'}" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#1. specify the vdb client\n", "from vectorai.client import ViClient\n", "vi_client = ViClient(username, api_key, url)\n", "vi_client.delete_collection(collection_name)\n", "\n", "#2. specify an audio encoder\n", "from vectorai.models.deployed import ViAudio2Vec\n", "audio_encoder = ViAudio2Vec(username, api_key, url)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "d:\\kda\\vectorai\\vectorai\\read.py:351: UserWarning: Potential issue. Cannot find a vector field. Check that the vector field is _vector_.\n", " \"Potential issue. Cannot find a vector field. Check that the vector field is _vector_.\"\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f1b5d6b126b4459facb92dadabb1764f", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, max=66), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] }, { "data": { "text/plain": [ "{'inserted_successfully': 1000, 'failed': 0, 'failed_document_ids': []}" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" }, { "name": "stdout", "output_type": "stream", "text": [ "{'status': 'Finished'}\n" ] }, { "data": { "text/plain": [ "'Done'" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#3. insert the documents and encode audio simultaneously\n", "use_jobs = True\n", "\n", "if use_jobs:\n", " vi_client.insert_documents(collection_name, documents)\n", " job = vi_client.encode_audio_job(collection_name, 'audio')\n", " vi_client.wait_till_jobs_complete(collection_name, job['job_id'], job['job_name'])\n", "else:\n", " vi_client.insert_documents(collection_name, documents, models={'audio':audio_encoder.encode})" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
_idnameinsert_date_audio_search_score
01common_voice_en_1.wav2020-10-02T07:07:34.7253781.000000
112common_voice_en_12.wav2020-10-02T07:07:34.7261000.893219
232common_voice_en_32.wav2020-10-02T07:07:35.2716380.891373
320common_voice_en_20.wav2020-10-02T07:07:35.0461280.882336
415common_voice_en_15.wav2020-10-02T07:07:34.7262510.877323
" ], "text/plain": [ "" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import IPython.display as ipd\n", "#4. search\n", "search_results = vi_client.search(collection_name, audio_encoder.encode(documents[0]['audio']), \n", " 'audio_vector_', page_size=5)\n", "\n", "vi_client.show_json(search_results, audio_fields=['audio'])" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
_idnameinsert_date_audio_search_score
02common_voice_en_2.wav2020-10-02T07:07:34.7255361.000000
140common_voice_en_40.wav2020-10-02T07:07:35.2726030.884632
23common_voice_en_3.wav2020-10-02T07:07:34.7256290.879187
314common_voice_en_14.wav2020-10-02T07:07:34.7262000.874556
421common_voice_en_21.wav2020-10-02T07:07:35.0462240.865409
" ], "text/plain": [ "" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#5 recommendation by id\n", "search_by_id_results = vi_client.search_by_id(collection_name, '2', 'audio_vector_', page_size=5)\n", "\n", "vi_client.show_json(search_by_id_results, audio_fields=['audio'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Build Text QA Search in 5 minutes" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: datasets in c:\\users\\jacky\\anaconda3\\lib\\site-packages (1.0.1)\n", "Requirement already satisfied: filelock in c:\\users\\jacky\\anaconda3\\lib\\site-packages (from datasets) (3.0.12)\n", "Requirement already satisfied: dill in c:\\users\\jacky\\anaconda3\\lib\\site-packages (from datasets) (0.3.1.1)\n", "Requirement already satisfied: pyarrow>=0.17.1 in c:\\users\\jacky\\anaconda3\\lib\\site-packages (from datasets) (1.0.0)\n", "Requirement already satisfied: requests>=2.19.0 in c:\\users\\jacky\\anaconda3\\lib\\site-packages (from datasets) (2.22.0)\n", "Requirement already satisfied: numpy>=1.17 in c:\\users\\jacky\\anaconda3\\lib\\site-packages (from datasets) (1.19.1)\n", "Requirement already satisfied: pandas in c:\\users\\jacky\\anaconda3\\lib\\site-packages (from datasets) (0.25.2)\n", "Requirement already satisfied: tqdm>=4.27 in c:\\users\\jacky\\anaconda3\\lib\\site-packages (from datasets) (4.36.1)\n", "Requirement already satisfied: xxhash in c:\\users\\jacky\\anaconda3\\lib\\site-packages (from datasets) (2.0.0)\n", "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\jacky\\anaconda3\\lib\\site-packages (from requests>=2.19.0->datasets) (2020.6.20)\n", "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in c:\\users\\jacky\\anaconda3\\lib\\site-packages (from requests>=2.19.0->datasets) (1.24.2)\n", "Requirement already satisfied: idna<2.9,>=2.5 in c:\\users\\jacky\\anaconda3\\lib\\site-packages (from requests>=2.19.0->datasets) (2.8)\n", "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in c:\\users\\jacky\\anaconda3\\lib\\site-packages (from requests>=2.19.0->datasets) (3.0.4)\n", "Requirement already satisfied: pytz>=2017.2 in c:\\users\\jacky\\anaconda3\\lib\\site-packages (from pandas->datasets) (2019.3)\n", "Requirement already satisfied: python-dateutil>=2.6.1 in c:\\users\\jacky\\anaconda3\\lib\\site-packages (from pandas->datasets) (2.8.0)\n", "Requirement already satisfied: six>=1.5 in c:\\users\\jacky\\anaconda3\\lib\\site-packages (from python-dateutil>=2.6.1->pandas->datasets) (1.12.0)\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "%pip install datasets" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "collection_name = 'squad'\n", "\n", "#use huggingface's datasets library to download squad\n", "import datasets\n", "squad_dataset = datasets.load_dataset('squad')\n", "documents = [{'_id':str(n), **d} for n, d in enumerate(squad_dataset['validation'])]\n", "vi_client.delete_collection(collection_name)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "#1. specify the vdb client\n", "from vectorai.client import ViClient\n", "vi_client = ViClient(username, api_key, url)\n", "vi_client.delete_collection(collection_name)\n", "\n", "#2. specify a text encoder\n", "from vectorai.models.deployed import ViText2Vec\n", "text_encoder = ViText2Vec(username, api_key, 'https://api.vctr.ai')" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "d:\\kda\\vectorai\\vectorai\\read.py:351: UserWarning: Potential issue. Cannot find a vector field. Check that the vector field is _vector_.\n", " \"Potential issue. Cannot find a vector field. Check that the vector field is _vector_.\"\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "cbe3f07f6cdb4be78030b4b89440a277", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, max=704), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] }, { "data": { "text/plain": [ "{'inserted_successfully': 10570, 'failed': 0, 'failed_document_ids': []}" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" }, { "name": "stdout", "output_type": "stream", "text": [ "{'status': 'Finished'}\n" ] }, { "data": { "text/plain": [ "'Done'" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#3. insert the documents and encode text simultaneously\n", "use_jobs = True\n", "\n", "if use_jobs:\n", " vi_client.insert_documents(collection_name, documents)\n", " job = vi_client.encode_text_job(collection_name, 'question')\n", " vi_client.wait_till_jobs_complete(collection_name, job['job_id'], job['job_name'])\n", "else:\n", " vi_client.insert_documents(collection_name, documents, models={'question':text_encoder}, use_bulk_encode=True)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
_idquestionanswerscontextinsert_date_idtitle_search_score
011Who won Super Bowl 50?{'answer_start': [177, 177, 177], 'text': ['De...Super Bowl 50 was an American football game to...2020-10-02T07:47:06.94731356beace93aeaaa14008c91dfSuper_Bowl_500.798744
124Who won Super Bowl 50?{'answer_start': [177, 177, 177], 'text': ['De...Super Bowl 50 was an American football game to...2020-10-02T07:47:07.28518256d20362e7d4791d009025ebSuper_Bowl_500.798744
23Which NFL team won Super Bowl 50?{'answer_start': [177, 177, 177], 'text': ['De...Super Bowl 50 was an American football game to...2020-10-02T07:47:06.94669456be4db0acb8001400a502efSuper_Bowl_500.763209
355Who was the Super Bowl 50 MVP?{'answer_start': [248, 248, 252], 'text': ['Vo...The Broncos took an early lead in Super Bowl 5...2020-10-02T07:47:07.75915456be4eafacb8001400a50302Super_Bowl_500.754090
426Which team won Super Bowl 50.{'answer_start': [177, 177, 177], 'text': ['De...Super Bowl 50 was an American football game to...2020-10-02T07:47:07.28540356d600e31c85041400946eb0Super_Bowl_500.742759
\n", "
" ], "text/plain": [ " _id question \\\n", "0 11 Who won Super Bowl 50? \n", "1 24 Who won Super Bowl 50? \n", "2 3 Which NFL team won Super Bowl 50? \n", "3 55 Who was the Super Bowl 50 MVP? \n", "4 26 Which team won Super Bowl 50. \n", "\n", " answers \\\n", "0 {'answer_start': [177, 177, 177], 'text': ['De... \n", "1 {'answer_start': [177, 177, 177], 'text': ['De... \n", "2 {'answer_start': [177, 177, 177], 'text': ['De... \n", "3 {'answer_start': [248, 248, 252], 'text': ['Vo... \n", "4 {'answer_start': [177, 177, 177], 'text': ['De... \n", "\n", " context \\\n", "0 Super Bowl 50 was an American football game to... \n", "1 Super Bowl 50 was an American football game to... \n", "2 Super Bowl 50 was an American football game to... \n", "3 The Broncos took an early lead in Super Bowl 5... \n", "4 Super Bowl 50 was an American football game to... \n", "\n", " insert_date_ id title \\\n", "0 2020-10-02T07:47:06.947313 56beace93aeaaa14008c91df Super_Bowl_50 \n", "1 2020-10-02T07:47:07.285182 56d20362e7d4791d009025eb Super_Bowl_50 \n", "2 2020-10-02T07:47:06.946694 56be4db0acb8001400a502ef Super_Bowl_50 \n", "3 2020-10-02T07:47:07.759154 56be4eafacb8001400a50302 Super_Bowl_50 \n", "4 2020-10-02T07:47:07.285403 56d600e31c85041400946eb0 Super_Bowl_50 \n", "\n", " _search_score \n", "0 0.798744 \n", "1 0.798744 \n", "2 0.763209 \n", "3 0.754090 \n", "4 0.742759 " ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#4. search\n", "search_results = vi_client.search(collection_name, \n", " text_encoder.encode('who was the winner for nfl fifty'), \n", " 'question_vector_', page_size=5)\n", "\n", "#4.2 first result is the query text itself\n", "vi_client.results_to_df(search_results)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
_idquestionanswerscontextinsert_date_idtitle_search_score
050Who did Denver beat in the 2015 AFC Championsh...{'answer_start': [372, 368, 372], 'text': ['Ne...The Panthers finished the regular season with ...2020-10-02T07:47:07.75876756d6017d1c85041400946ec1Super_Bowl_501.000000
148Who did Denver beat in the AFC championship?{'answer_start': [372, 368, 372], 'text': ['Ne...The Panthers finished the regular season with ...2020-10-02T07:47:07.75854156d2045de7d4791d009025f6Super_Bowl_500.960072
2331Who did the Broncos beat to win their division...{'answer_start': [25, 25, 36], 'text': ['Pitts...The Broncos defeated the Pittsburgh Steelers i...2020-10-02T07:47:12.20903856d99f99dc89441400fdb628Super_Bowl_500.923735
3330Who did the Broncos defeat in the AFC Champion...{'answer_start': [192, 192, 204], 'text': ['Ne...The Broncos defeated the Pittsburgh Steelers i...2020-10-02T07:47:12.20887656d7018a0d65d214001982c5Super_Bowl_500.915792
4328Who did the Broncos beat in the divisional game?{'answer_start': [25, 21, 36], 'text': ['Pitts...The Broncos defeated the Pittsburgh Steelers i...2020-10-02T07:47:11.95608956d7018a0d65d214001982c2Super_Bowl_500.906187
\n", "
" ], "text/plain": [ " _id question \\\n", "0 50 Who did Denver beat in the 2015 AFC Championsh... \n", "1 48 Who did Denver beat in the AFC championship? \n", "2 331 Who did the Broncos beat to win their division... \n", "3 330 Who did the Broncos defeat in the AFC Champion... \n", "4 328 Who did the Broncos beat in the divisional game? \n", "\n", " answers \\\n", "0 {'answer_start': [372, 368, 372], 'text': ['Ne... \n", "1 {'answer_start': [372, 368, 372], 'text': ['Ne... \n", "2 {'answer_start': [25, 25, 36], 'text': ['Pitts... \n", "3 {'answer_start': [192, 192, 204], 'text': ['Ne... \n", "4 {'answer_start': [25, 21, 36], 'text': ['Pitts... \n", "\n", " context \\\n", "0 The Panthers finished the regular season with ... \n", "1 The Panthers finished the regular season with ... \n", "2 The Broncos defeated the Pittsburgh Steelers i... \n", "3 The Broncos defeated the Pittsburgh Steelers i... \n", "4 The Broncos defeated the Pittsburgh Steelers i... \n", "\n", " insert_date_ id title \\\n", "0 2020-10-02T07:47:07.758767 56d6017d1c85041400946ec1 Super_Bowl_50 \n", "1 2020-10-02T07:47:07.758541 56d2045de7d4791d009025f6 Super_Bowl_50 \n", "2 2020-10-02T07:47:12.209038 56d99f99dc89441400fdb628 Super_Bowl_50 \n", "3 2020-10-02T07:47:12.208876 56d7018a0d65d214001982c5 Super_Bowl_50 \n", "4 2020-10-02T07:47:11.956089 56d7018a0d65d214001982c2 Super_Bowl_50 \n", "\n", " _search_score \n", "0 1.000000 \n", "1 0.960072 \n", "2 0.923735 \n", "3 0.915792 \n", "4 0.906187 " ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#5 recommendation by id\n", "search_by_id_results = vi_client.search_by_id(collection_name, documents[50]['_id'], 'question_vector_', page_size=5)\n", "\n", "#5.2 first result is the id's text itself\n", "vi_client.results_to_df(search_by_id_results)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
_idquestionanswerscontextinsert_date_idtitle_search_score
0258How old was Peyton Manning in 2015?{'answer_start': [817, 817, 817], 'text': ['39...Following their loss in the divisional round o...2020-10-02T07:47:11.00083056bf301c3aeaaa14008c9550Super_Bowl_500.641220
1276How may yards did Peyton Manning throw?{'answer_start': [77, 77, 77], 'text': ['2,249...Manning finished the year with a career-low 67...2020-10-02T07:47:11.23919556bf38383aeaaa14008c956cSuper_Bowl_500.634783
2270What was Peyton Manning's passer rating for th...{'answer_start': [44, 44, 44], 'text': ['67.9'...Manning finished the year with a career-low 67...2020-10-02T07:47:11.23864656beb57b3aeaaa14008c9279Super_Bowl_500.617874
3252Who did Peyton Manning play for as a rookie?{'answer_start': [641, 637, 654], 'text': ['In...Following their loss in the divisional round o...2020-10-02T07:47:10.76042356beb4e43aeaaa14008c9267Super_Bowl_500.612926
4356Peyton Manning took how many different teams t...{'answer_start': [57, 57, 57, 57], 'text': ['t...Peyton Manning became the first quarterback ev...2020-10-02T07:47:12.42891556d704430d65d214001982deSuper_Bowl_500.611716
\n", "
" ], "text/plain": [ " _id question \\\n", "0 258 How old was Peyton Manning in 2015? \n", "1 276 How may yards did Peyton Manning throw? \n", "2 270 What was Peyton Manning's passer rating for th... \n", "3 252 Who did Peyton Manning play for as a rookie? \n", "4 356 Peyton Manning took how many different teams t... \n", "\n", " answers \\\n", "0 {'answer_start': [817, 817, 817], 'text': ['39... \n", "1 {'answer_start': [77, 77, 77], 'text': ['2,249... \n", "2 {'answer_start': [44, 44, 44], 'text': ['67.9'... \n", "3 {'answer_start': [641, 637, 654], 'text': ['In... \n", "4 {'answer_start': [57, 57, 57, 57], 'text': ['t... \n", "\n", " context \\\n", "0 Following their loss in the divisional round o... \n", "1 Manning finished the year with a career-low 67... \n", "2 Manning finished the year with a career-low 67... \n", "3 Following their loss in the divisional round o... \n", "4 Peyton Manning became the first quarterback ev... \n", "\n", " insert_date_ id title \\\n", "0 2020-10-02T07:47:11.000830 56bf301c3aeaaa14008c9550 Super_Bowl_50 \n", "1 2020-10-02T07:47:11.239195 56bf38383aeaaa14008c956c Super_Bowl_50 \n", "2 2020-10-02T07:47:11.238646 56beb57b3aeaaa14008c9279 Super_Bowl_50 \n", "3 2020-10-02T07:47:10.760423 56beb4e43aeaaa14008c9267 Super_Bowl_50 \n", "4 2020-10-02T07:47:12.428915 56d704430d65d214001982de Super_Bowl_50 \n", "\n", " _search_score \n", "0 0.641220 \n", "1 0.634783 \n", "2 0.617874 \n", "3 0.612926 \n", "4 0.611716 " ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#6 hybrid search combining traditional and nlp vector search\n", "search_results = vi_client.hybrid_search(collection_name, 'Peyton Men',\n", " text_encoder.encode('Peyton Men'),\n", " ['question_vector_'], ['question'],\n", " traditional_weight=0.015,\n", " page_size=5)\n", "vi_client.results_to_df(search_results)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "celltoolbar": "Edit Metadata", "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 4 }