Source code for aind_dynamic_foraging_models.generative_model.act_functions

"""Functions for action selection in generative models"""

from typing import Optional

import numpy as np
from aind_behavior_gym.dynamic_foraging.task import L, R
from scipy.stats import norm


[docs] def act_softmax( q_value_t: np.array, softmax_inverse_temperature: float, bias_terms: np.array, choice_kernel_relative_weight=None, choice_kernel=None, rng=None, ): """Given q values and softmax_inverse_temperature, return the choice and choice probability. If chocie_kernel is not None, it will sum it into the softmax function like this 1. Compute adjusted Q values by adding bias terms and choice kernel :math:`Q' = \\beta * (Q + w_{ck} * choice\\_kernel) + bias` :math:`\\beta` ~ softmax_inverse_temperature :math:`w_{ck}` ~ choice_kernel_relative_weight 2. Compute choice probabilities by softmax function :math:`choice\\_prob = exp(Q'_i) / \\sum_i(exp(Q'_i))` Parameters ---------- q_value_t : list or np.array array of q values, by default 0 softmax_inverse_temperature : int, optional inverse temperature of softmax function, by default 0 bias_terms : np.array, optional _description_, by default 0 choice_kernel_relative_weight : _type_, optional relative strength of choice kernel relative to Q in decision, by default None. If not none, choice kernel will have an inverse temperature of softmax_inverse_temperature * choice_kernel_relative_weight choice_kernel : _type_, optional _description_, by default None rng : _type_, optional random number generator, by default None Returns ------- _type_ _description_ """ # -- Compute adjusted Q value -- # Note that the bias term is outside the temperature term to make it comparable across # different softmax_inverse_temperatures. # Also, I switched to inverse_temperature from temperature to make # the fittings more numerically stable. adjusted_Q = softmax_inverse_temperature * q_value_t + bias_terms if choice_kernel is not None: adjusted_Q += softmax_inverse_temperature * choice_kernel_relative_weight * choice_kernel # -- Compute choice probabilities -- choice_prob = softmax(adjusted_Q, rng=rng) # -- Choose action -- choice = choose_ps(choice_prob, rng=rng) return choice, choice_prob
[docs] def act_epsilon_greedy( q_value_t: np.array, epsilon: float, bias_terms: np.array, choice_kernel=None, choice_kernel_relative_weight=None, rng=None, ): """Action selection by epsilon-greedy method. Steps: 1. Compute adjusted Q values by adding bias terms and choice kernel Q' = Q + bias + choice_kernel_relative_weight * choice_kernel 2. The espilon-greedy method is quivalent to choice probabilities: If Q'_L != Q'_R (for simplicity, we assume only two choices) choice_prob [(argmax(Q')] = 1 - epsilon / 2 choice_prob [(argmin(Q'))] = epsilon / 2 else choice_prob [:] = 0.5 Parameters ---------- q_value_t : np.array Current Q-values epsilon : float Probability of exploration bias_terms : np.array Bias terms choice_kernel : None or np.array, optional If not None, it will be added to Q-values, by default None choice_kernel_relative_weight : _type_, optional If not None, it controls the relative weight of choice kernel, by default None rng : _type_, optional _description_, by default None """ rng = rng or np.random.default_rng() # -- Compute adjusted Q value -- adjusted_Q = q_value_t + bias_terms if choice_kernel is not None: adjusted_Q += choice_kernel_relative_weight * choice_kernel # -- Compute choice probabilities -- if adjusted_Q[0] == adjusted_Q[1]: choice_prob = np.array([0.5, 0.5]) else: argmax_Q = np.argmax(adjusted_Q) choice_prob = np.array([epsilon / 2, epsilon / 2]) choice_prob[argmax_Q] = 1 - epsilon / 2 # -- Choose action -- choice = choose_ps(choice_prob, rng=rng) return choice, choice_prob
[docs] def act_loss_counting( previous_choice: Optional[int], loss_count: int, loss_count_threshold_mean: float, loss_count_threshold_std: float, bias_terms: np.array, choice_kernel=None, choice_kernel_relative_weight=None, rng=None, ): """Action selection by loss counting method. Parameters ---------- previous_choice : int Last choice loss_count : int Current loss count loss_count_threshold_mean : float Mean of the loss count threshold loss_count_threshold_std : float Standard deviation of the loss count threshold bias_terms: np.array Bias terms loss count choice_kernel : None or np.array, optional If not None, it will be added to Q-values, by default None choice_kernel_relative_weight : _type_, optional If not None, it controls the relative weight of choice kernel, by default None rng : _type_, optional """ rng = rng or np.random.default_rng() # -- Return random if this is the first trial -- if previous_choice is None: choice_prob = np.array([0.5, 0.5]) return choose_ps(choice_prob, rng=rng), choice_prob # -- Compute probability of switching -- # This cdf trick is equivalent to: # 1) sample a threshold from the normal distribution # 2) compare the threshold with the loss count prob_switch = norm.cdf( loss_count, loss_count_threshold_mean - 1e-10, # To make sure this is equivalent to ">=" if the threshold is an integer loss_count_threshold_std + 1e-16, # To make sure this cdf trick works for std=0 ) choice_prob = np.array([prob_switch, prob_switch]) # Assuming only two choices choice_prob[int(previous_choice)] = 1 - prob_switch # -- Add choice kernel -- # For a fair comparison with other models that have choice kernel. # However, choice kernel of different families are not directly comparable. # Here, I first compute a normalized choice probability for choice kernel alone using softmax # with inverse temperature 1.0, compute a bias introduced by the choice kernel, and then add # it to the original choice probability. if choice_kernel is not None: choice_prob_choice_kernel = softmax(choice_kernel, rng=rng) bias_L_from_choice_kernel = ( choice_prob_choice_kernel[L] - 0.5 ) * choice_kernel_relative_weight # A biasL term introduced by the choice kernel choice_prob[L] += bias_L_from_choice_kernel # -- Add global bias -- # For a fair comparison with other models that have bias terms. # However, bias terms of different families are not directly comparable. # Here, the bias term is added to the choice probability directly, whereas in other models, # the bias term is added to the Q-values. choice_prob[L] += bias_terms[L] # -- Re-normalize choice probability -- choice_prob[L] = np.clip(choice_prob[L], 0, 1) choice_prob[R] = 1 - choice_prob[L] return choose_ps(choice_prob, rng=rng), choice_prob
# --- Helper functions ---
[docs] def softmax(x, rng=None): """ Parameters ---------- x : _type_ _description_ rng : _type_, optional _description_, by default None Returns ------- _type_ _description_ """ rng = rng or np.random.default_rng() if np.max(x) > 700: # To prevent explosion of EXP argmax_x = np.argmax(rng.permutation(x)) # Randomly choose one of the max values greedy = np.zeros(len(x)) greedy[argmax_x] = 1 return greedy else: # Normal softmax return np.exp(x) / np.sum(np.exp(x))
[docs] def choose_ps(ps, rng=None): """ "Poisson"-choice process """ rng = rng or np.random.default_rng() ps = ps / np.sum(ps) return np.max(np.argwhere(np.hstack([-1e-16, np.cumsum(ps)]) < rng.random()))