QuickStart

Build Image Search In 5 Minutes

[6]:
collection_name = 'pokemon_images'

documents = []
for i in range(1, 20):
    documents.append({
        'image': 'https://assets.pokemon.com/assets/cms2/img/pokedex/full/{}.png'.format(f'{i:03}'),
        'pokemon_id' : str(i),
        '_id': i
    })
[7]:
#1. specify the vdb client
from vectorai.client import ViClient
vi_client = ViClient(username, api_key, url)
vi_client.delete_collection(collection_name)

#2. specify an image encoder
from vectorai.models.deployed import ViImage2Vec
image_encoder = ViImage2Vec(username, api_key, url)
Logged in. Welcome public-demo. To view list of available collections, call list_collections() method.
[7]:
{'status': 'complete', 'message': 'pokemon_images deleted'}
[8]:
#3. insert the documents and encode images simultaneously
# using jobs means that the encoding process takes place on our servers as opposed to your computer
use_jobs = False

if use_jobs:
    vi_client.insert_documents(collection_name, documents)
    job = vi_client.encode_image_job(collection_name, 'image')
    vi_client.wait_till_jobs_complete(collection_name, job['job_id'], job['job_name'])
else:
    vi_client.insert_documents(collection_name, documents, models={'image':image_encoder.encode})

[8]:
{'inserted_successfully': 19, 'failed': 0, 'failed_document_ids': []}
[9]:
#4. search
search_results = vi_client.search(collection_name,
    image_encoder.encode('https://assets.pokemon.com/assets/cms2/img/pokedex/full/003.png'),
    'image_vector_', page_size=5)

#4.2 first result is the query audio itself
vi_client.show_json(search_results, image_fields=['image'], image_width=150)
[9]:
_id image pokemon_id insert_date_ _search_score
0 3 3 2020-10-02T07:07:18.799559 1.000000
1 2 2 2020-10-02T07:07:18.797693 0.920337
2 1 1 2020-10-02T07:07:18.795655 0.838996
3 17 17 2020-10-02T07:07:30.587841 0.835111
4 16 16 2020-10-02T07:07:30.585399 0.813012
[10]:
#5 recommendation by id
search_by_id_results = vi_client.search_by_id(collection_name, '2', 'image_vector_', page_size=5)

#5.2 first result is the id's audio itself
vi_client.show_json(search_by_id_results, image_fields=['image'], image_width=150)
[10]:
_id image pokemon_id insert_date_ _search_score
0 2 2 2020-10-02T07:07:18.797693 1.000000
1 3 3 2020-10-02T07:07:18.799559 0.920337
2 1 1 2020-10-02T07:07:18.795655 0.895991
3 7 7 2020-10-02T07:07:18.807199 0.839945
4 17 17 2020-10-02T07:07:30.587841 0.833277

Build Audio Search in 5 Minutes

Building Audio search is easy with Vi!

[11]:
collection_name = 'audio_quickstart'

#create the documents
documents = []
for i in range(1, 1001):
    documents.append({
        'audio': 'https://vecsearch-bucket.s3.us-east-2.amazonaws.com/voices/common_voice_en_{}.wav'.format(i),
        'name' : 'common_voice_en_{}.wav'.format(i),
        '_id': i
    })
[12]:
#1. specify the vdb client
from vectorai.client import ViClient
vi_client = ViClient(username, api_key, url)
vi_client.delete_collection(collection_name)

#2. specify an audio encoder
from vectorai.models.deployed import ViAudio2Vec
audio_encoder = ViAudio2Vec(username, api_key, url)
Logged in. Welcome public-demo. To view list of available collections, call list_collections() method.
[12]:
{'status': 'complete', 'message': 'audio_quickstart deleted'}
[13]:
#3. insert the documents and encode audio simultaneously
use_jobs = True

if use_jobs:
    vi_client.insert_documents(collection_name, documents)
    job = vi_client.encode_audio_job(collection_name, 'audio')
    vi_client.wait_till_jobs_complete(collection_name, job['job_id'], job['job_name'])
else:
    vi_client.insert_documents(collection_name, documents, models={'audio':audio_encoder.encode})
d:\kda\vectorai\vectorai\read.py:351: UserWarning: Potential issue. Cannot find a vector field. Check that the vector field is _vector_.
  "Potential issue. Cannot find a vector field. Check that the vector field is _vector_."

[13]:
{'inserted_successfully': 1000, 'failed': 0, 'failed_document_ids': []}
{'status': 'Finished'}
[13]:
'Done'
[14]:
import IPython.display as ipd
#4. search
search_results = vi_client.search(collection_name, audio_encoder.encode(documents[0]['audio']),
    'audio_vector_', page_size=5)

vi_client.show_json(search_results, audio_fields=['audio'])
[14]:
_id name insert_date_ audio _search_score
0 1 common_voice_en_1.wav 2020-10-02T07:07:34.725378 1.000000
1 12 common_voice_en_12.wav 2020-10-02T07:07:34.726100 0.893219
2 32 common_voice_en_32.wav 2020-10-02T07:07:35.271638 0.891373
3 20 common_voice_en_20.wav 2020-10-02T07:07:35.046128 0.882336
4 15 common_voice_en_15.wav 2020-10-02T07:07:34.726251 0.877323
[15]:
#5 recommendation by id
search_by_id_results = vi_client.search_by_id(collection_name, '2', 'audio_vector_', page_size=5)

vi_client.show_json(search_by_id_results, audio_fields=['audio'])
[15]:
_id name insert_date_ audio _search_score
0 2 common_voice_en_2.wav 2020-10-02T07:07:34.725536 1.000000
1 40 common_voice_en_40.wav 2020-10-02T07:07:35.272603 0.884632
2 3 common_voice_en_3.wav 2020-10-02T07:07:34.725629 0.879187
3 14 common_voice_en_14.wav 2020-10-02T07:07:34.726200 0.874556
4 21 common_voice_en_21.wav 2020-10-02T07:07:35.046224 0.865409

Build Text QA Search in 5 minutes

[16]:
%pip install datasets
Requirement already satisfied: datasets in c:\users\jacky\anaconda3\lib\site-packages (1.0.1)
Requirement already satisfied: filelock in c:\users\jacky\anaconda3\lib\site-packages (from datasets) (3.0.12)
Requirement already satisfied: dill in c:\users\jacky\anaconda3\lib\site-packages (from datasets) (0.3.1.1)
Requirement already satisfied: pyarrow>=0.17.1 in c:\users\jacky\anaconda3\lib\site-packages (from datasets) (1.0.0)
Requirement already satisfied: requests>=2.19.0 in c:\users\jacky\anaconda3\lib\site-packages (from datasets) (2.22.0)
Requirement already satisfied: numpy>=1.17 in c:\users\jacky\anaconda3\lib\site-packages (from datasets) (1.19.1)
Requirement already satisfied: pandas in c:\users\jacky\anaconda3\lib\site-packages (from datasets) (0.25.2)
Requirement already satisfied: tqdm>=4.27 in c:\users\jacky\anaconda3\lib\site-packages (from datasets) (4.36.1)
Requirement already satisfied: xxhash in c:\users\jacky\anaconda3\lib\site-packages (from datasets) (2.0.0)
Requirement already satisfied: certifi>=2017.4.17 in c:\users\jacky\anaconda3\lib\site-packages (from requests>=2.19.0->datasets) (2020.6.20)
Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in c:\users\jacky\anaconda3\lib\site-packages (from requests>=2.19.0->datasets) (1.24.2)
Requirement already satisfied: idna<2.9,>=2.5 in c:\users\jacky\anaconda3\lib\site-packages (from requests>=2.19.0->datasets) (2.8)
Requirement already satisfied: chardet<3.1.0,>=3.0.2 in c:\users\jacky\anaconda3\lib\site-packages (from requests>=2.19.0->datasets) (3.0.4)
Requirement already satisfied: pytz>=2017.2 in c:\users\jacky\anaconda3\lib\site-packages (from pandas->datasets) (2019.3)
Requirement already satisfied: python-dateutil>=2.6.1 in c:\users\jacky\anaconda3\lib\site-packages (from pandas->datasets) (2.8.0)
Requirement already satisfied: six>=1.5 in c:\users\jacky\anaconda3\lib\site-packages (from python-dateutil>=2.6.1->pandas->datasets) (1.12.0)
Note: you may need to restart the kernel to use updated packages.
[ ]:
collection_name = 'squad'

#use huggingface's datasets library to download squad
import datasets
squad_dataset = datasets.load_dataset('squad')
documents = [{'_id':str(n), **d} for n, d in enumerate(squad_dataset['validation'])]
vi_client.delete_collection(collection_name)
[1]:
#1. specify the vdb client
from vectorai.client import ViClient
vi_client = ViClient(username, api_key, url)
vi_client.delete_collection(collection_name)

#2. specify a text encoder
from vectorai.models.deployed import ViText2Vec
text_encoder = ViText2Vec(username, api_key, 'https://api.vctr.ai')
[20]:
#3. insert the documents and encode text simultaneously
use_jobs = True

if use_jobs:
    vi_client.insert_documents(collection_name, documents)
    job = vi_client.encode_text_job(collection_name, 'question')
    vi_client.wait_till_jobs_complete(collection_name, job['job_id'], job['job_name'])
else:
    vi_client.insert_documents(collection_name, documents, models={'question':text_encoder}, use_bulk_encode=True)
d:\kda\vectorai\vectorai\read.py:351: UserWarning: Potential issue. Cannot find a vector field. Check that the vector field is _vector_.
  "Potential issue. Cannot find a vector field. Check that the vector field is _vector_."

[20]:
{'inserted_successfully': 10570, 'failed': 0, 'failed_document_ids': []}
{'status': 'Finished'}
[20]:
'Done'
[21]:
#4. search
search_results = vi_client.search(collection_name,
                                   text_encoder.encode('who was the winner for nfl fifty'),
                                   'question_vector_', page_size=5)

#4.2 first result is the query text itself
vi_client.results_to_df(search_results)
[21]:
_id question answers context insert_date_ id title _search_score
0 11 Who won Super Bowl 50? {'answer_start': [177, 177, 177], 'text': ['De... Super Bowl 50 was an American football game to... 2020-10-02T07:47:06.947313 56beace93aeaaa14008c91df Super_Bowl_50 0.798744
1 24 Who won Super Bowl 50? {'answer_start': [177, 177, 177], 'text': ['De... Super Bowl 50 was an American football game to... 2020-10-02T07:47:07.285182 56d20362e7d4791d009025eb Super_Bowl_50 0.798744
2 3 Which NFL team won Super Bowl 50? {'answer_start': [177, 177, 177], 'text': ['De... Super Bowl 50 was an American football game to... 2020-10-02T07:47:06.946694 56be4db0acb8001400a502ef Super_Bowl_50 0.763209
3 55 Who was the Super Bowl 50 MVP? {'answer_start': [248, 248, 252], 'text': ['Vo... The Broncos took an early lead in Super Bowl 5... 2020-10-02T07:47:07.759154 56be4eafacb8001400a50302 Super_Bowl_50 0.754090
4 26 Which team won Super Bowl 50. {'answer_start': [177, 177, 177], 'text': ['De... Super Bowl 50 was an American football game to... 2020-10-02T07:47:07.285403 56d600e31c85041400946eb0 Super_Bowl_50 0.742759
[22]:
#5 recommendation by id
search_by_id_results = vi_client.search_by_id(collection_name, documents[50]['_id'], 'question_vector_', page_size=5)

#5.2 first result is the id's text itself
vi_client.results_to_df(search_by_id_results)
[22]:
_id question answers context insert_date_ id title _search_score
0 50 Who did Denver beat in the 2015 AFC Championsh... {'answer_start': [372, 368, 372], 'text': ['Ne... The Panthers finished the regular season with ... 2020-10-02T07:47:07.758767 56d6017d1c85041400946ec1 Super_Bowl_50 1.000000
1 48 Who did Denver beat in the AFC championship? {'answer_start': [372, 368, 372], 'text': ['Ne... The Panthers finished the regular season with ... 2020-10-02T07:47:07.758541 56d2045de7d4791d009025f6 Super_Bowl_50 0.960072
2 331 Who did the Broncos beat to win their division... {'answer_start': [25, 25, 36], 'text': ['Pitts... The Broncos defeated the Pittsburgh Steelers i... 2020-10-02T07:47:12.209038 56d99f99dc89441400fdb628 Super_Bowl_50 0.923735
3 330 Who did the Broncos defeat in the AFC Champion... {'answer_start': [192, 192, 204], 'text': ['Ne... The Broncos defeated the Pittsburgh Steelers i... 2020-10-02T07:47:12.208876 56d7018a0d65d214001982c5 Super_Bowl_50 0.915792
4 328 Who did the Broncos beat in the divisional game? {'answer_start': [25, 21, 36], 'text': ['Pitts... The Broncos defeated the Pittsburgh Steelers i... 2020-10-02T07:47:11.956089 56d7018a0d65d214001982c2 Super_Bowl_50 0.906187
[23]:
#6 hybrid search combining traditional and nlp vector search
search_results = vi_client.hybrid_search(collection_name, 'Peyton Men',
                                          text_encoder.encode('Peyton Men'),
                                          ['question_vector_'], ['question'],
                                          traditional_weight=0.015,
                                          page_size=5)
vi_client.results_to_df(search_results)
[23]:
_id question answers context insert_date_ id title _search_score
0 258 How old was Peyton Manning in 2015? {'answer_start': [817, 817, 817], 'text': ['39... Following their loss in the divisional round o... 2020-10-02T07:47:11.000830 56bf301c3aeaaa14008c9550 Super_Bowl_50 0.641220
1 276 How may yards did Peyton Manning throw? {'answer_start': [77, 77, 77], 'text': ['2,249... Manning finished the year with a career-low 67... 2020-10-02T07:47:11.239195 56bf38383aeaaa14008c956c Super_Bowl_50 0.634783
2 270 What was Peyton Manning's passer rating for th... {'answer_start': [44, 44, 44], 'text': ['67.9'... Manning finished the year with a career-low 67... 2020-10-02T07:47:11.238646 56beb57b3aeaaa14008c9279 Super_Bowl_50 0.617874
3 252 Who did Peyton Manning play for as a rookie? {'answer_start': [641, 637, 654], 'text': ['In... Following their loss in the divisional round o... 2020-10-02T07:47:10.760423 56beb4e43aeaaa14008c9267 Super_Bowl_50 0.612926
4 356 Peyton Manning took how many different teams t... {'answer_start': [57, 57, 57, 57], 'text': ['t... Peyton Manning became the first quarterback ev... 2020-10-02T07:47:12.428915 56d704430d65d214001982de Super_Bowl_50 0.611716
[ ]: