Login Sign Up

Understanding Embeddings and Representations in AI

Tokenization and Embeddings
from gensim.models import Word2Vec

# Sample dataset
sentences = [
    ["I", "love", "AI"],
    ["AI", "is", "amazing"],
    ["Embeddings", "capture", "meaning"]
]

# Function to train Word2Vec model
def train_word2vec(sentences, vector_size=10, window=5, min_count=1, workers=4):
    model = Word2Vec(sentences, vector_size=vector_size, window=window, min_count=min_count, workers=workers)
    return model

# Train the Word2Vec model
model = train_word2vec(sentences)

# Function to get the embedding of a word
def get_word_embedding(model, word):
    if word in model.wv:
        return model.wv[word]
    else:
        raise ValueError(f"The word '{word}' is not in the vocabulary.")

# Get vector representation of "AI"
try:
    ai_embedding = get_word_embedding(model, "AI")
    print("AI Embedding:", ai_embedding)
except ValueError as e:
    print(e)