cimai/spin_glass.py

import numpy as np
from itertools import product
import re

class SpinGlassHamiltonian:
    def __init__(self, n_words=4, word_bits=8, text_file='sentences.txt', seed=None):
        """
        Initialize a fully connected spin glass Hamiltonian with J_ij couplings
        constructed from text patterns

        Args:
            n_words (int): Number of words per pattern
            word_bits (int): Number of bits to encode each word
            text_file (str): Path to file containing training sentences
            seed (int): Random seed for reproducibility
        """
        if seed is not None:
            np.random.seed(seed)

        self.n_words = n_words
        self.word_bits = word_bits
        self.n_spins = n_words * word_bits

        # Load text from file
        try:
            with open(text_file, 'r') as f:
                text = f.read()
        except FileNotFoundError:
            print(f"Warning: {text_file} not found. Using default text.")
            text = """
            The quantum spin glass model shows fascinating behavior
            in statistical physics and complex systems research
            many body interactions lead to frustration effects
            ground state properties reveal emergent phenomena
            """

        # Generate patterns from text
        self.patterns, self.words = self.text_to_patterns(text, n_words, word_bits)

        # Initialize J matrix for 2-point interactions
        self.J = np.zeros((self.n_spins, self.n_spins))

        # Construct J using patterns
        for pattern in self.patterns:
            flat_pattern = pattern.flatten()
            # 2-point interactions
            self.J += np.outer(flat_pattern, flat_pattern)

        # Normalize by number of patterns
        self.J = self.J / len(self.patterns)
        np.fill_diagonal(self.J, 0)  # No self-interactions

        print("Words used to construct patterns:")
        for i, words in enumerate(self.words):
            print(f"\nPattern {i} words: {words}")
            print(f"Pattern {i} configuration ({self.n_words}x{self.word_bits}):")
            print(self.patterns[i])

    def text_to_patterns(self, text, n_words, word_bits):
        """Convert text to patterns where each row encodes a full word"""
        words = re.findall(r'\b\w+\b', text.lower())
        patterns = []
        pattern_words = []

        # Modified to shift one word at a time
        for i in range(len(words) - n_words + 1):
            word_group = words[i:i+n_words]
            pattern_words.append(word_group)

            # Create pattern (n_words x word_bits)
            pattern = np.zeros((n_words, word_bits))
            for row, word in enumerate(word_group):
                # Hash the word to a unique pattern
                word_hash = sum(ord(c) for c in word)
                # Generate word_bits number of bits
                for col in range(word_bits):
                    bit_val = (word_hash >> col) & 1
                    pattern[row, col] = 1 if bit_val else -1

            patterns.append(pattern)

            if len(patterns) >= 5:  # Limit to 5 patterns
                break

        return np.array(patterns), pattern_words

    def calculate_energy(self, state):
        """
        Calculate energy using only 2-point interactions

        Args:
            state (numpy.array): Array of +1/-1 spins

        Returns:
            float: Energy of the configuration
        """
        # 2-point interaction energy
        energy = -0.5 * np.sum(self.J * np.outer(state, state))
        return energy

    def state_to_2d(self, state):
        """Convert 1D state array to 2D grid (n_words x word_bits)"""
        return state.reshape(self.n_words, self.word_bits)

    def state_to_words(self, state_2d):
        """Analyze a 2D state pattern to find closest matching words from patterns"""
        closest_words = []
        for row_idx, row in enumerate(state_2d):
            # Find the pattern row that has the highest overlap with this state row
            max_overlap = -1
            best_word = None

            for pattern_idx, pattern in enumerate(self.patterns):
                for word_idx, pattern_row in enumerate(pattern):
                    overlap = abs(np.sum(row * pattern_row) / self.word_bits)
                    if overlap > max_overlap:
                        max_overlap = overlap
                        best_word = self.words[pattern_idx][word_idx]

            closest_words.append(f"{best_word} (overlap: {max_overlap:.2f})")
        return closest_words

def generate_states(n_spins):
    for state in product([-1, 1], repeat=n_spins):
        yield np.array(state)

def main():
    # Create a 4x5 spin glass system (4 words per pattern, 5 bits per word)
    n_words = 24
    word_bits = 16
    sg = SpinGlassHamiltonian(n_words=n_words, word_bits=word_bits, seed=42)

    # Initialize the best state with random configuration
    current_state = np.random.choice([-1, 1], size=n_words * word_bits)
    current_energy = sg.calculate_energy(current_state)

    # Learn one word at a time
    for word_idx in range(n_words):
        print(f"\nOptimizing word {word_idx + 1}...")

        # Try all possibilities for current word while keeping others fixed
        best_energy = current_energy
        best_state = current_state.copy()

        # Generate all possibilities for one word (2^word_bits combinations)
        for word_state in product([-1, 1], repeat=word_bits):
            # Create test state by replacing only the current word's bits
            test_state = current_state.copy()
            start_idx = word_idx * word_bits
            end_idx = start_idx + word_bits
            test_state[start_idx:end_idx] = word_state

            # Calculate energy
            energy = sg.calculate_energy(test_state)

            # Update if better
            if energy < best_energy:
                best_energy = energy
                best_state = test_state.copy()

        # Update current state with best found for this word
        current_state = best_state
        current_energy = best_energy

        # Show intermediate result
        state_2d = sg.state_to_2d(current_state)
        print(f"Current energy: {current_energy:.4f}")
        print("Current state:")
        print(state_2d)
        words = sg.state_to_words(state_2d)
        print("Current words:")
        for i, word_info in enumerate(words):
            print(f"Word {i+1}: {word_info}")

    # Store final result
    state_energies = [(current_state, current_energy)]

    print(f"\nSpin Glass System with {n_words}x{word_bits} lattice")
    print("\nOptimized state:")

    # Get the single optimized state
    state, energy = state_energies[0]
    state_2d = sg.state_to_2d(state)

    # Calculate absolute overlap with each pattern
    overlaps = []
    for p, pattern in enumerate(sg.patterns):
        overlap = abs(np.sum(state_2d * pattern) / (n_words*word_bits))
        overlaps.append(f"P{p}: {overlap:.2f}")

    print(f"\nEnergy: {energy:.4f}")
    print(f"State configuration:\n{state_2d}")
    print("\nGenerated words:")
    closest_words = sg.state_to_words(state_2d)
    for row_idx, word_info in enumerate(closest_words):
        print(f"Word {row_idx + 1}: {word_info}")
    print(f"Absolute overlaps with patterns: {', '.join(overlaps)}")

    # Show which original sentence this state is most similar to
    best_pattern_idx = np.argmax([float(o.split(': ')[1]) for o in overlaps])
    print(f"Most similar to sentence: {' '.join(sg.words[best_pattern_idx])}")

    # Generate a 30-word sentence
    print("\nGenerating 30-word sentence:")
    # Start with the first n_words-1 words from the best matching pattern
    sentence = list(sg.words[best_pattern_idx][:-1])  # Take all but last word as initial context

    # Generate remaining words until we reach 30
    while len(sentence) < 30:
        # Create state from current context
        context_state = np.zeros((n_words, word_bits))
        for i, word in enumerate(sentence[-n_words+1:]):
            word_hash = sum(ord(c) for c in word)
            for col in range(word_bits):
                bit_val = (word_hash >> col) & 1
                context_state[i, col] = 1 if bit_val else -1

        # Optimize the next word (last row of state)
        best_energy = float('inf')
        best_word_state = None

        # Try all possibilities for the next word
        for word_state in product([-1, 1], repeat=word_bits):
            test_state = context_state.copy()
            test_state[-1] = word_state

            # Calculate energy
            energy = sg.calculate_energy(test_state.flatten())

            if energy < best_energy:
                best_energy = energy
                best_word_state = test_state.copy()

        # Get the predicted word
        words = sg.state_to_words(best_word_state)
        next_word = words[-1].split(" (")[0]  # Get just the word, not the overlap info
        sentence.append(next_word)

    print(" ".join(sentence))

if __name__ == "__main__":
    main()