QuickStart¶
Build Image Search In 5 Minutes¶
[6]:
collection_name = 'pokemon_images'
documents = []
for i in range(1, 20):
documents.append({
'image': 'https://assets.pokemon.com/assets/cms2/img/pokedex/full/{}.png'.format(f'{i:03}'),
'pokemon_id' : str(i),
'_id': i
})
[7]:
#1. specify the vdb client
from vectorai.client import ViClient
vi_client = ViClient(username, api_key, url)
vi_client.delete_collection(collection_name)
#2. specify an image encoder
from vectorai.models.deployed import ViImage2Vec
image_encoder = ViImage2Vec(username, api_key, url)
Logged in. Welcome public-demo. To view list of available collections, call list_collections() method.
[7]:
{'status': 'complete', 'message': 'pokemon_images deleted'}
[8]:
#3. insert the documents and encode images simultaneously
# using jobs means that the encoding process takes place on our servers as opposed to your computer
use_jobs = False
if use_jobs:
vi_client.insert_documents(collection_name, documents)
job = vi_client.encode_image_job(collection_name, 'image')
vi_client.wait_till_jobs_complete(collection_name, job['job_id'], job['job_name'])
else:
vi_client.insert_documents(collection_name, documents, models={'image':image_encoder.encode})
[8]:
{'inserted_successfully': 19, 'failed': 0, 'failed_document_ids': []}
[9]:
#4. search
search_results = vi_client.search(collection_name,
image_encoder.encode('https://assets.pokemon.com/assets/cms2/img/pokedex/full/003.png'),
'image_vector_', page_size=5)
#4.2 first result is the query audio itself
vi_client.show_json(search_results, image_fields=['image'], image_width=150)
[9]:
_id | image | pokemon_id | insert_date_ | _search_score | |
---|---|---|---|---|---|
0 | 3 | 3 | 2020-10-02T07:07:18.799559 | 1.000000 | |
1 | 2 | 2 | 2020-10-02T07:07:18.797693 | 0.920337 | |
2 | 1 | 1 | 2020-10-02T07:07:18.795655 | 0.838996 | |
3 | 17 | 17 | 2020-10-02T07:07:30.587841 | 0.835111 | |
4 | 16 | 16 | 2020-10-02T07:07:30.585399 | 0.813012 |
[10]:
#5 recommendation by id
search_by_id_results = vi_client.search_by_id(collection_name, '2', 'image_vector_', page_size=5)
#5.2 first result is the id's audio itself
vi_client.show_json(search_by_id_results, image_fields=['image'], image_width=150)
[10]:
_id | image | pokemon_id | insert_date_ | _search_score | |
---|---|---|---|---|---|
0 | 2 | 2 | 2020-10-02T07:07:18.797693 | 1.000000 | |
1 | 3 | 3 | 2020-10-02T07:07:18.799559 | 0.920337 | |
2 | 1 | 1 | 2020-10-02T07:07:18.795655 | 0.895991 | |
3 | 7 | 7 | 2020-10-02T07:07:18.807199 | 0.839945 | |
4 | 17 | 17 | 2020-10-02T07:07:30.587841 | 0.833277 |
Build Audio Search in 5 Minutes¶
Building Audio search is easy with Vi!
[11]:
collection_name = 'audio_quickstart'
#create the documents
documents = []
for i in range(1, 1001):
documents.append({
'audio': 'https://vecsearch-bucket.s3.us-east-2.amazonaws.com/voices/common_voice_en_{}.wav'.format(i),
'name' : 'common_voice_en_{}.wav'.format(i),
'_id': i
})
[12]:
#1. specify the vdb client
from vectorai.client import ViClient
vi_client = ViClient(username, api_key, url)
vi_client.delete_collection(collection_name)
#2. specify an audio encoder
from vectorai.models.deployed import ViAudio2Vec
audio_encoder = ViAudio2Vec(username, api_key, url)
Logged in. Welcome public-demo. To view list of available collections, call list_collections() method.
[12]:
{'status': 'complete', 'message': 'audio_quickstart deleted'}
[13]:
#3. insert the documents and encode audio simultaneously
use_jobs = True
if use_jobs:
vi_client.insert_documents(collection_name, documents)
job = vi_client.encode_audio_job(collection_name, 'audio')
vi_client.wait_till_jobs_complete(collection_name, job['job_id'], job['job_name'])
else:
vi_client.insert_documents(collection_name, documents, models={'audio':audio_encoder.encode})
d:\kda\vectorai\vectorai\read.py:351: UserWarning: Potential issue. Cannot find a vector field. Check that the vector field is _vector_.
"Potential issue. Cannot find a vector field. Check that the vector field is _vector_."
[13]:
{'inserted_successfully': 1000, 'failed': 0, 'failed_document_ids': []}
{'status': 'Finished'}
[13]:
'Done'
[14]:
import IPython.display as ipd
#4. search
search_results = vi_client.search(collection_name, audio_encoder.encode(documents[0]['audio']),
'audio_vector_', page_size=5)
vi_client.show_json(search_results, audio_fields=['audio'])
[14]:
_id | name | insert_date_ | audio | _search_score | |
---|---|---|---|---|---|
0 | 1 | common_voice_en_1.wav | 2020-10-02T07:07:34.725378 | 1.000000 | |
1 | 12 | common_voice_en_12.wav | 2020-10-02T07:07:34.726100 | 0.893219 | |
2 | 32 | common_voice_en_32.wav | 2020-10-02T07:07:35.271638 | 0.891373 | |
3 | 20 | common_voice_en_20.wav | 2020-10-02T07:07:35.046128 | 0.882336 | |
4 | 15 | common_voice_en_15.wav | 2020-10-02T07:07:34.726251 | 0.877323 |
[15]:
#5 recommendation by id
search_by_id_results = vi_client.search_by_id(collection_name, '2', 'audio_vector_', page_size=5)
vi_client.show_json(search_by_id_results, audio_fields=['audio'])
[15]:
_id | name | insert_date_ | audio | _search_score | |
---|---|---|---|---|---|
0 | 2 | common_voice_en_2.wav | 2020-10-02T07:07:34.725536 | 1.000000 | |
1 | 40 | common_voice_en_40.wav | 2020-10-02T07:07:35.272603 | 0.884632 | |
2 | 3 | common_voice_en_3.wav | 2020-10-02T07:07:34.725629 | 0.879187 | |
3 | 14 | common_voice_en_14.wav | 2020-10-02T07:07:34.726200 | 0.874556 | |
4 | 21 | common_voice_en_21.wav | 2020-10-02T07:07:35.046224 | 0.865409 |
Build Text QA Search in 5 minutes¶
[16]:
%pip install datasets
Requirement already satisfied: datasets in c:\users\jacky\anaconda3\lib\site-packages (1.0.1)
Requirement already satisfied: filelock in c:\users\jacky\anaconda3\lib\site-packages (from datasets) (3.0.12)
Requirement already satisfied: dill in c:\users\jacky\anaconda3\lib\site-packages (from datasets) (0.3.1.1)
Requirement already satisfied: pyarrow>=0.17.1 in c:\users\jacky\anaconda3\lib\site-packages (from datasets) (1.0.0)
Requirement already satisfied: requests>=2.19.0 in c:\users\jacky\anaconda3\lib\site-packages (from datasets) (2.22.0)
Requirement already satisfied: numpy>=1.17 in c:\users\jacky\anaconda3\lib\site-packages (from datasets) (1.19.1)
Requirement already satisfied: pandas in c:\users\jacky\anaconda3\lib\site-packages (from datasets) (0.25.2)
Requirement already satisfied: tqdm>=4.27 in c:\users\jacky\anaconda3\lib\site-packages (from datasets) (4.36.1)
Requirement already satisfied: xxhash in c:\users\jacky\anaconda3\lib\site-packages (from datasets) (2.0.0)
Requirement already satisfied: certifi>=2017.4.17 in c:\users\jacky\anaconda3\lib\site-packages (from requests>=2.19.0->datasets) (2020.6.20)
Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in c:\users\jacky\anaconda3\lib\site-packages (from requests>=2.19.0->datasets) (1.24.2)
Requirement already satisfied: idna<2.9,>=2.5 in c:\users\jacky\anaconda3\lib\site-packages (from requests>=2.19.0->datasets) (2.8)
Requirement already satisfied: chardet<3.1.0,>=3.0.2 in c:\users\jacky\anaconda3\lib\site-packages (from requests>=2.19.0->datasets) (3.0.4)
Requirement already satisfied: pytz>=2017.2 in c:\users\jacky\anaconda3\lib\site-packages (from pandas->datasets) (2019.3)
Requirement already satisfied: python-dateutil>=2.6.1 in c:\users\jacky\anaconda3\lib\site-packages (from pandas->datasets) (2.8.0)
Requirement already satisfied: six>=1.5 in c:\users\jacky\anaconda3\lib\site-packages (from python-dateutil>=2.6.1->pandas->datasets) (1.12.0)
Note: you may need to restart the kernel to use updated packages.
[ ]:
collection_name = 'squad'
#use huggingface's datasets library to download squad
import datasets
squad_dataset = datasets.load_dataset('squad')
documents = [{'_id':str(n), **d} for n, d in enumerate(squad_dataset['validation'])]
vi_client.delete_collection(collection_name)
[1]:
#1. specify the vdb client
from vectorai.client import ViClient
vi_client = ViClient(username, api_key, url)
vi_client.delete_collection(collection_name)
#2. specify a text encoder
from vectorai.models.deployed import ViText2Vec
text_encoder = ViText2Vec(username, api_key, 'https://api.vctr.ai')
[20]:
#3. insert the documents and encode text simultaneously
use_jobs = True
if use_jobs:
vi_client.insert_documents(collection_name, documents)
job = vi_client.encode_text_job(collection_name, 'question')
vi_client.wait_till_jobs_complete(collection_name, job['job_id'], job['job_name'])
else:
vi_client.insert_documents(collection_name, documents, models={'question':text_encoder}, use_bulk_encode=True)
d:\kda\vectorai\vectorai\read.py:351: UserWarning: Potential issue. Cannot find a vector field. Check that the vector field is _vector_.
"Potential issue. Cannot find a vector field. Check that the vector field is _vector_."
[20]:
{'inserted_successfully': 10570, 'failed': 0, 'failed_document_ids': []}
{'status': 'Finished'}
[20]:
'Done'
[21]:
#4. search
search_results = vi_client.search(collection_name,
text_encoder.encode('who was the winner for nfl fifty'),
'question_vector_', page_size=5)
#4.2 first result is the query text itself
vi_client.results_to_df(search_results)
[21]:
_id | question | answers | context | insert_date_ | id | title | _search_score | |
---|---|---|---|---|---|---|---|---|
0 | 11 | Who won Super Bowl 50? | {'answer_start': [177, 177, 177], 'text': ['De... | Super Bowl 50 was an American football game to... | 2020-10-02T07:47:06.947313 | 56beace93aeaaa14008c91df | Super_Bowl_50 | 0.798744 |
1 | 24 | Who won Super Bowl 50? | {'answer_start': [177, 177, 177], 'text': ['De... | Super Bowl 50 was an American football game to... | 2020-10-02T07:47:07.285182 | 56d20362e7d4791d009025eb | Super_Bowl_50 | 0.798744 |
2 | 3 | Which NFL team won Super Bowl 50? | {'answer_start': [177, 177, 177], 'text': ['De... | Super Bowl 50 was an American football game to... | 2020-10-02T07:47:06.946694 | 56be4db0acb8001400a502ef | Super_Bowl_50 | 0.763209 |
3 | 55 | Who was the Super Bowl 50 MVP? | {'answer_start': [248, 248, 252], 'text': ['Vo... | The Broncos took an early lead in Super Bowl 5... | 2020-10-02T07:47:07.759154 | 56be4eafacb8001400a50302 | Super_Bowl_50 | 0.754090 |
4 | 26 | Which team won Super Bowl 50. | {'answer_start': [177, 177, 177], 'text': ['De... | Super Bowl 50 was an American football game to... | 2020-10-02T07:47:07.285403 | 56d600e31c85041400946eb0 | Super_Bowl_50 | 0.742759 |
[22]:
#5 recommendation by id
search_by_id_results = vi_client.search_by_id(collection_name, documents[50]['_id'], 'question_vector_', page_size=5)
#5.2 first result is the id's text itself
vi_client.results_to_df(search_by_id_results)
[22]:
_id | question | answers | context | insert_date_ | id | title | _search_score | |
---|---|---|---|---|---|---|---|---|
0 | 50 | Who did Denver beat in the 2015 AFC Championsh... | {'answer_start': [372, 368, 372], 'text': ['Ne... | The Panthers finished the regular season with ... | 2020-10-02T07:47:07.758767 | 56d6017d1c85041400946ec1 | Super_Bowl_50 | 1.000000 |
1 | 48 | Who did Denver beat in the AFC championship? | {'answer_start': [372, 368, 372], 'text': ['Ne... | The Panthers finished the regular season with ... | 2020-10-02T07:47:07.758541 | 56d2045de7d4791d009025f6 | Super_Bowl_50 | 0.960072 |
2 | 331 | Who did the Broncos beat to win their division... | {'answer_start': [25, 25, 36], 'text': ['Pitts... | The Broncos defeated the Pittsburgh Steelers i... | 2020-10-02T07:47:12.209038 | 56d99f99dc89441400fdb628 | Super_Bowl_50 | 0.923735 |
3 | 330 | Who did the Broncos defeat in the AFC Champion... | {'answer_start': [192, 192, 204], 'text': ['Ne... | The Broncos defeated the Pittsburgh Steelers i... | 2020-10-02T07:47:12.208876 | 56d7018a0d65d214001982c5 | Super_Bowl_50 | 0.915792 |
4 | 328 | Who did the Broncos beat in the divisional game? | {'answer_start': [25, 21, 36], 'text': ['Pitts... | The Broncos defeated the Pittsburgh Steelers i... | 2020-10-02T07:47:11.956089 | 56d7018a0d65d214001982c2 | Super_Bowl_50 | 0.906187 |
[23]:
#6 hybrid search combining traditional and nlp vector search
search_results = vi_client.hybrid_search(collection_name, 'Peyton Men',
text_encoder.encode('Peyton Men'),
['question_vector_'], ['question'],
traditional_weight=0.015,
page_size=5)
vi_client.results_to_df(search_results)
[23]:
_id | question | answers | context | insert_date_ | id | title | _search_score | |
---|---|---|---|---|---|---|---|---|
0 | 258 | How old was Peyton Manning in 2015? | {'answer_start': [817, 817, 817], 'text': ['39... | Following their loss in the divisional round o... | 2020-10-02T07:47:11.000830 | 56bf301c3aeaaa14008c9550 | Super_Bowl_50 | 0.641220 |
1 | 276 | How may yards did Peyton Manning throw? | {'answer_start': [77, 77, 77], 'text': ['2,249... | Manning finished the year with a career-low 67... | 2020-10-02T07:47:11.239195 | 56bf38383aeaaa14008c956c | Super_Bowl_50 | 0.634783 |
2 | 270 | What was Peyton Manning's passer rating for th... | {'answer_start': [44, 44, 44], 'text': ['67.9'... | Manning finished the year with a career-low 67... | 2020-10-02T07:47:11.238646 | 56beb57b3aeaaa14008c9279 | Super_Bowl_50 | 0.617874 |
3 | 252 | Who did Peyton Manning play for as a rookie? | {'answer_start': [641, 637, 654], 'text': ['In... | Following their loss in the divisional round o... | 2020-10-02T07:47:10.760423 | 56beb4e43aeaaa14008c9267 | Super_Bowl_50 | 0.612926 |
4 | 356 | Peyton Manning took how many different teams t... | {'answer_start': [57, 57, 57, 57], 'text': ['t... | Peyton Manning became the first quarterback ev... | 2020-10-02T07:47:12.428915 | 56d704430d65d214001982de | Super_Bowl_50 | 0.611716 |
[ ]: