hey

2025-02-22 18:27:43 +03:30
parent 8820b64c3e
commit f3f7fcda8a
1 changed files with 58 additions and 43 deletions
--- a/load_tinystories.py
+++ b/load_tinystories.py
@@ -217,16 +217,18 @@ def sequence_to_words(sequence, N=12):
    words = [''.join(bits[i:i + N]) for i in range(0, len(bits), N)]
    return words

-def calculate_energy(sequences, batch_size=32):
+def calculate_energy(sequences, batch_size=32, h=0.1):
    """
-    Calculate the energy of sequences using batched processing.
-    Returns energies and Hamiltonian matrix.
+    Calculate the energy of sequences using batched processing with magnetic field.
+    Returns energies and weight matrix W.
+    h: magnetic field strength
    """
    num_sequences = len(sequences)
    seq_length = sequences[0].shape[0]
    
-    # Initialize Hamiltonian matrix
-    hamiltonian = np.zeros((seq_length, seq_length))
+    # Initialize weight matrix and magnetic field
+    W = np.zeros((seq_length, seq_length))
+    h_field = h * np.ones(seq_length).reshape(-1, 1)  # Uniform magnetic field
    energies = []
    
    # Process sequences in batches
@@ -234,37 +236,42 @@ def calculate_energy(sequences, batch_size=32):
        batch = sequences[i:min(i + batch_size, num_sequences)]
        batch = np.array(batch)  # Convert batch to numpy array
        
-        # Calculate batch energies
-        batch_energies = np.sum(batch * batch.transpose(0, 2, 1), axis=(1, 2)) / -2
+        # Calculate batch contribution to weight matrix (Hebbian learning)
+        for seq in batch:
+            W += np.dot(seq, seq.T)
+        
+        # Calculate batch energies including magnetic field
+        batch_energies = []
+        for seq in batch:
+            # E = -1/2 * s^T * W * s - h * sum(s)
+            # Properly extract scalar values from matrix multiplications
+            spin_spin_matrix = seq.T.dot(W).dot(seq)
+            spin_spin = -0.5 * float(spin_spin_matrix[0, 0])
+            
+            magnetic_matrix = h_field.T.dot(seq)
+            magnetic = -float(magnetic_matrix[0, 0])
+            
+            energy = spin_spin + magnetic
+            batch_energies.append(energy)
+        
        energies.extend(batch_energies)
    
-        # Update Hamiltonian
-        batch_hamiltonian = np.sum(np.matmul(batch, batch.transpose(0, 2, 1)), axis=0)
-        hamiltonian += batch_hamiltonian
+    # Normalize weight matrix
+    W = W / num_sequences
    
-        # Free memory
-        del batch
-        del batch_energies
-        del batch_hamiltonian
+    return np.array(energies), W, h_field

-    # Normalize Hamiltonian
-    hamiltonian = hamiltonian / num_sequences
-    
-    return np.array(energies), hamiltonian
-
-def retrieve_sequences(sequences, partial_sequence, vocab, W, M=10, N=12, temperature=1.0):
+def retrieve_sequences(sequences, partial_sequence, vocab, W, M=10, N=12, temperature=1.0, h=0.1):
    """
-    Retrieve the most likely next word using Ising Hamiltonian with temperature.
-    Uses associative memory to retrieve the last word of the sequence.
+    Retrieve the most likely next word using Ising Hamiltonian with magnetic field.
    """
-    # Convert partial sequence to vector
-    partial_vec = np.array([int(bit) for bit in partial_sequence]).reshape(-1, 1)
-    
    # Get all possible words from vocabulary
    possible_words = list(vocab.values())
    
-    # Calculate weights matrix (Hebbian learning)
-    # Calculate energies for all possible words
+    # Create magnetic field
+    h_field = h * np.ones(M * N).reshape(-1, 1)
+    
+    # Calculate energies for all possible completions
    word_energies = []
    
    for word in possible_words:
@@ -273,29 +280,36 @@ def retrieve_sequences(sequences, partial_sequence, vocab, W, M=10, N=12, temper
        if len(complete_sequence) == M*N:  # Ensure correct length
            complete_vec = np.array([int(bit) for bit in complete_sequence]).reshape(M * N, 1)
            
-            # Calculate energy using Ising Hamiltonian
-            energy_matrix = complete_vec.T.dot(W).dot(complete_vec)
-            energy = float(energy_matrix[0, 0])
+            # Calculate energy with both interaction and magnetic field terms
+            spin_spin = 0
+            for seq in sequences:
+                # Properly extract scalar from matrix multiplication
+                overlap_matrix = complete_vec.T.dot(seq)
+                overlap = overlap_matrix[0, 0]  # Extract single scalar value
+                spin_spin -= overlap * overlap
            
-            word_energies.append((word, energy))
+            # Extract scalar from magnetic field contribution
+            magnetic_matrix = h_field.T.dot(complete_vec)
+            magnetic = -float(magnetic_matrix[0, 0])
+            total_energy = spin_spin + magnetic
+            
+            word_energies.append((word, total_energy))
    
    # Sort by energy
    word_energies.sort(key=lambda x: x[1])
    
-    # Normalize energies to prevent overflow
+    # Normalize energies
    energies = np.array([e[1] for e in word_energies])
-    energies = energies - np.min(energies)  # Shift to make minimum energy 0
-    energies = energies / np.max(energies) if np.max(energies) > 0 else energies  # Scale to [0,1]
+    energies = energies - np.min(energies)
+    max_energy = np.max(energies)
+    if max_energy > 0:
+        energies = energies / max_energy
    
-    # Calculate probabilities with normalized energies
+    # Calculate probabilities with Boltzmann distribution
    probabilities = np.exp(-energies/temperature)
    probabilities = probabilities / np.sum(probabilities)
    
-    # Check for valid probabilities
-    if np.any(np.isnan(probabilities)):
-        # Fallback to uniform distribution if numerical issues occur
-        probabilities = np.ones(len(word_energies)) / len(word_energies)
-    
+    # Sample from distribution
    selected_idx = np.random.choice(len(word_energies), p=probabilities)
    best_word, min_energy = word_energies[selected_idx]
    
@@ -340,9 +354,10 @@ def predict_sequence(initial_sequence, vocab, sequences, W, D=10, M=100, N=12, t

 if __name__ == "__main__":
    N = 20  # Define N as a constant
-    M = 20  # Define M as a constant
+    M = 100  # Define M as a constant
    D = 10   # Number of words to predict
-    temperature = 0.10
+    temperature = 1
+    
    batch_size = 50  # Added batch size parameter
    
    print("Loading and encoding stories...")
@@ -360,7 +375,7 @@ if __name__ == "__main__":
    
    # Get initial sequence from first story
    story_tokens = tokenize_with_punctuation(original_stories[0])
-    _, W = calculate_energy(sequences)
+    _, W, _ = calculate_energy(sequences)

    # Make sure we have enough tokens for M=100
    if len(story_tokens) >= M-1: