This commit is contained in:
alireza
2025-02-22 18:27:43 +03:30
parent 8820b64c3e
commit f3f7fcda8a

View File

@@ -217,16 +217,18 @@ def sequence_to_words(sequence, N=12):
words = [''.join(bits[i:i + N]) for i in range(0, len(bits), N)]
return words
def calculate_energy(sequences, batch_size=32):
def calculate_energy(sequences, batch_size=32, h=0.1):
"""
Calculate the energy of sequences using batched processing.
Returns energies and Hamiltonian matrix.
Calculate the energy of sequences using batched processing with magnetic field.
Returns energies and weight matrix W.
h: magnetic field strength
"""
num_sequences = len(sequences)
seq_length = sequences[0].shape[0]
# Initialize Hamiltonian matrix
hamiltonian = np.zeros((seq_length, seq_length))
# Initialize weight matrix and magnetic field
W = np.zeros((seq_length, seq_length))
h_field = h * np.ones(seq_length).reshape(-1, 1) # Uniform magnetic field
energies = []
# Process sequences in batches
@@ -234,37 +236,42 @@ def calculate_energy(sequences, batch_size=32):
batch = sequences[i:min(i + batch_size, num_sequences)]
batch = np.array(batch) # Convert batch to numpy array
# Calculate batch energies
batch_energies = np.sum(batch * batch.transpose(0, 2, 1), axis=(1, 2)) / -2
# Calculate batch contribution to weight matrix (Hebbian learning)
for seq in batch:
W += np.dot(seq, seq.T)
# Calculate batch energies including magnetic field
batch_energies = []
for seq in batch:
# E = -1/2 * s^T * W * s - h * sum(s)
# Properly extract scalar values from matrix multiplications
spin_spin_matrix = seq.T.dot(W).dot(seq)
spin_spin = -0.5 * float(spin_spin_matrix[0, 0])
magnetic_matrix = h_field.T.dot(seq)
magnetic = -float(magnetic_matrix[0, 0])
energy = spin_spin + magnetic
batch_energies.append(energy)
energies.extend(batch_energies)
# Update Hamiltonian
batch_hamiltonian = np.sum(np.matmul(batch, batch.transpose(0, 2, 1)), axis=0)
hamiltonian += batch_hamiltonian
# Normalize weight matrix
W = W / num_sequences
# Free memory
del batch
del batch_energies
del batch_hamiltonian
return np.array(energies), W, h_field
# Normalize Hamiltonian
hamiltonian = hamiltonian / num_sequences
return np.array(energies), hamiltonian
def retrieve_sequences(sequences, partial_sequence, vocab, W, M=10, N=12, temperature=1.0):
def retrieve_sequences(sequences, partial_sequence, vocab, W, M=10, N=12, temperature=1.0, h=0.1):
"""
Retrieve the most likely next word using Ising Hamiltonian with temperature.
Uses associative memory to retrieve the last word of the sequence.
Retrieve the most likely next word using Ising Hamiltonian with magnetic field.
"""
# Convert partial sequence to vector
partial_vec = np.array([int(bit) for bit in partial_sequence]).reshape(-1, 1)
# Get all possible words from vocabulary
possible_words = list(vocab.values())
# Calculate weights matrix (Hebbian learning)
# Calculate energies for all possible words
# Create magnetic field
h_field = h * np.ones(M * N).reshape(-1, 1)
# Calculate energies for all possible completions
word_energies = []
for word in possible_words:
@@ -273,29 +280,36 @@ def retrieve_sequences(sequences, partial_sequence, vocab, W, M=10, N=12, temper
if len(complete_sequence) == M*N: # Ensure correct length
complete_vec = np.array([int(bit) for bit in complete_sequence]).reshape(M * N, 1)
# Calculate energy using Ising Hamiltonian
energy_matrix = complete_vec.T.dot(W).dot(complete_vec)
energy = float(energy_matrix[0, 0])
# Calculate energy with both interaction and magnetic field terms
spin_spin = 0
for seq in sequences:
# Properly extract scalar from matrix multiplication
overlap_matrix = complete_vec.T.dot(seq)
overlap = overlap_matrix[0, 0] # Extract single scalar value
spin_spin -= overlap * overlap
word_energies.append((word, energy))
# Extract scalar from magnetic field contribution
magnetic_matrix = h_field.T.dot(complete_vec)
magnetic = -float(magnetic_matrix[0, 0])
total_energy = spin_spin + magnetic
word_energies.append((word, total_energy))
# Sort by energy
word_energies.sort(key=lambda x: x[1])
# Normalize energies to prevent overflow
# Normalize energies
energies = np.array([e[1] for e in word_energies])
energies = energies - np.min(energies) # Shift to make minimum energy 0
energies = energies / np.max(energies) if np.max(energies) > 0 else energies # Scale to [0,1]
energies = energies - np.min(energies)
max_energy = np.max(energies)
if max_energy > 0:
energies = energies / max_energy
# Calculate probabilities with normalized energies
# Calculate probabilities with Boltzmann distribution
probabilities = np.exp(-energies/temperature)
probabilities = probabilities / np.sum(probabilities)
# Check for valid probabilities
if np.any(np.isnan(probabilities)):
# Fallback to uniform distribution if numerical issues occur
probabilities = np.ones(len(word_energies)) / len(word_energies)
# Sample from distribution
selected_idx = np.random.choice(len(word_energies), p=probabilities)
best_word, min_energy = word_energies[selected_idx]
@@ -340,9 +354,10 @@ def predict_sequence(initial_sequence, vocab, sequences, W, D=10, M=100, N=12, t
if __name__ == "__main__":
N = 20 # Define N as a constant
M = 20 # Define M as a constant
M = 100 # Define M as a constant
D = 10 # Number of words to predict
temperature = 0.10
temperature = 1
batch_size = 50 # Added batch size parameter
print("Loading and encoding stories...")
@@ -360,7 +375,7 @@ if __name__ == "__main__":
# Get initial sequence from first story
story_tokens = tokenize_with_punctuation(original_stories[0])
_, W = calculate_energy(sequences)
_, W, _ = calculate_energy(sequences)
# Make sure we have enough tokens for M=100
if len(story_tokens) >= M-1: