# Example: Build a vocabulary from a list of sentences
def build_vocab(sentences):
"""
Builds a vocab dictionary mapping from words to indexes and indexes to words.
"""
tokens = [token for sentence in sentences for token in sentence.split()]
vocab = set(tokens)
word_to_ix = {word: i for i, word in enumerate(vocab)}
ix_to_word = {i: word for word, i in word_to_ix.items()}
return word_to_ix, ix_to_word
# Sample text data [Note: In a real-world scenario, this should come from your dataset]
text_data = [
"hello how are you",
"i am fine thank you",
"what are you doing",
"i am building an AI model"
]
# Build the vocab dictionaries from the given text data
word_to_ix, ix_to_word = build_vocab(text_data)
# Define the initial prompt to start the conversation
initial_prompt = "AI: Hello, how can I help you today?"
# Convert the initial prompt into a list of indices
prompt_indices = [word_to_ix[word] for word in initial_prompt.split() if word in word_to_ix]
print("Word to Index: ", word_to_ix)
print("Index to Word: ", ix_to_word)
print("Initial Prompt Indices: ", prompt_indices)