Module deepcomp.agent.brute_force
Expand source code
import logging
import numpy as np
from joblib import Parallel, delayed
from deepcomp.util.logs import config_logging
from deepcomp.agent.base import CentralAgent
class BruteForceAgent(CentralAgent):
"""
Brute force approach, testing all possible actions and choosing the best one.
Finds the optimal action per step but requires access to the env to test and evaluate each action.
Optimal in terms of the reward function of the central agent, eg, sum of UE utilities per step.
"""
def __init__(self, num_workers=1):
"""
:param num_workers: Number of jobs to run in parallel (should be < num cores).
Also >1 only makes sense for 3+ UEs and BS, otherwise overhead is higher than gain.
"""
super().__init__()
self.num_workers = num_workers
self.env = None
@staticmethod
def number_to_base(n, b, num_digits=None):
"""
Convert any decimal integer to a new number with any base.
Adjusted from: https://stackoverflow.com/a/28666223/2745116
:param n: Decimal integer
:param b: Base
:param num_digits: Number of digits to return
:return: List representing the new number. One list element per digit.
"""
# special case n=0
if n == 0:
if num_digits is None:
return [0]
else:
return [0 for _ in range(num_digits)]
# actual conversion
digits = []
while n:
digits.append(int(n % b))
n //= b
result = digits[::-1]
if num_digits is None:
return result
# pad with zeros to get the desired number of digits
assert num_digits >= len(result), "Num digits too small to represent converted number."
missing_digits = num_digits - len(result)
result = [0 for _ in range(missing_digits)] + result
assert len(result) == num_digits
return result
def get_ith_action(self, i):
"""Get the i-th action, when walking through the entire action space."""
# convert to number with base num_bs + 1, ie, actions selecting 0 (=noop) or one of the BS
action_list = self.number_to_base(i, self.env.num_bs + 1, num_digits=self.env.max_ues)
assert self.env.action_space.contains(action_list)
return action_list
def test_ith_action(self, i):
"""Test the i-th action and return the action and reward"""
# configure logging each time; necessary for parallel execution with joblib
config_logging()
self.env.set_log_level({'deepcomp.util.simulation': logging.DEBUG})
action_list = self.get_ith_action(i)
# need to test the action in dict form
action_dict = self.env.get_ue_actions(action_list)
rewards = self.env.test_ue_actions(action_dict)
reward = self.env.step_reward(rewards)
return action_list, reward
def compute_action(self, observation):
"""Test all actions and return the best one"""
assert self.env is not None, "Set agent's env before computing actions."
# parallelized version
zipped_results = Parallel(n_jobs=self.num_workers)(
delayed(self.test_ith_action)(i)
for i in range((self.env.num_bs + 1)**self.env.max_ues)
)
actions, rewards = map(list, zip(*zipped_results))
# get best action
best_idx = np.argmax(rewards)
best_action = actions[best_idx]
return best_action
Classes
class BruteForceAgent (num_workers=1)
-
Brute force approach, testing all possible actions and choosing the best one. Finds the optimal action per step but requires access to the env to test and evaluate each action. Optimal in terms of the reward function of the central agent, eg, sum of UE utilities per step.
:param num_workers: Number of jobs to run in parallel (should be < num cores). Also >1 only makes sense for 3+ UEs and BS, otherwise overhead is higher than gain.
Expand source code
class BruteForceAgent(CentralAgent): """ Brute force approach, testing all possible actions and choosing the best one. Finds the optimal action per step but requires access to the env to test and evaluate each action. Optimal in terms of the reward function of the central agent, eg, sum of UE utilities per step. """ def __init__(self, num_workers=1): """ :param num_workers: Number of jobs to run in parallel (should be < num cores). Also >1 only makes sense for 3+ UEs and BS, otherwise overhead is higher than gain. """ super().__init__() self.num_workers = num_workers self.env = None @staticmethod def number_to_base(n, b, num_digits=None): """ Convert any decimal integer to a new number with any base. Adjusted from: https://stackoverflow.com/a/28666223/2745116 :param n: Decimal integer :param b: Base :param num_digits: Number of digits to return :return: List representing the new number. One list element per digit. """ # special case n=0 if n == 0: if num_digits is None: return [0] else: return [0 for _ in range(num_digits)] # actual conversion digits = [] while n: digits.append(int(n % b)) n //= b result = digits[::-1] if num_digits is None: return result # pad with zeros to get the desired number of digits assert num_digits >= len(result), "Num digits too small to represent converted number." missing_digits = num_digits - len(result) result = [0 for _ in range(missing_digits)] + result assert len(result) == num_digits return result def get_ith_action(self, i): """Get the i-th action, when walking through the entire action space.""" # convert to number with base num_bs + 1, ie, actions selecting 0 (=noop) or one of the BS action_list = self.number_to_base(i, self.env.num_bs + 1, num_digits=self.env.max_ues) assert self.env.action_space.contains(action_list) return action_list def test_ith_action(self, i): """Test the i-th action and return the action and reward""" # configure logging each time; necessary for parallel execution with joblib config_logging() self.env.set_log_level({'deepcomp.util.simulation': logging.DEBUG}) action_list = self.get_ith_action(i) # need to test the action in dict form action_dict = self.env.get_ue_actions(action_list) rewards = self.env.test_ue_actions(action_dict) reward = self.env.step_reward(rewards) return action_list, reward def compute_action(self, observation): """Test all actions and return the best one""" assert self.env is not None, "Set agent's env before computing actions." # parallelized version zipped_results = Parallel(n_jobs=self.num_workers)( delayed(self.test_ith_action)(i) for i in range((self.env.num_bs + 1)**self.env.max_ues) ) actions, rewards = map(list, zip(*zipped_results)) # get best action best_idx = np.argmax(rewards) best_action = actions[best_idx] return best_action
Ancestors
Static methods
def number_to_base(n, b, num_digits=None)
-
Convert any decimal integer to a new number with any base. Adjusted from: https://stackoverflow.com/a/28666223/2745116
:param n: Decimal integer :param b: Base :param num_digits: Number of digits to return :return: List representing the new number. One list element per digit.
Expand source code
@staticmethod def number_to_base(n, b, num_digits=None): """ Convert any decimal integer to a new number with any base. Adjusted from: https://stackoverflow.com/a/28666223/2745116 :param n: Decimal integer :param b: Base :param num_digits: Number of digits to return :return: List representing the new number. One list element per digit. """ # special case n=0 if n == 0: if num_digits is None: return [0] else: return [0 for _ in range(num_digits)] # actual conversion digits = [] while n: digits.append(int(n % b)) n //= b result = digits[::-1] if num_digits is None: return result # pad with zeros to get the desired number of digits assert num_digits >= len(result), "Num digits too small to represent converted number." missing_digits = num_digits - len(result) result = [0 for _ in range(missing_digits)] + result assert len(result) == num_digits return result
Methods
def compute_action(self, observation)
-
Test all actions and return the best one
Expand source code
def compute_action(self, observation): """Test all actions and return the best one""" assert self.env is not None, "Set agent's env before computing actions." # parallelized version zipped_results = Parallel(n_jobs=self.num_workers)( delayed(self.test_ith_action)(i) for i in range((self.env.num_bs + 1)**self.env.max_ues) ) actions, rewards = map(list, zip(*zipped_results)) # get best action best_idx = np.argmax(rewards) best_action = actions[best_idx] return best_action
def get_ith_action(self, i)
-
Get the i-th action, when walking through the entire action space.
Expand source code
def get_ith_action(self, i): """Get the i-th action, when walking through the entire action space.""" # convert to number with base num_bs + 1, ie, actions selecting 0 (=noop) or one of the BS action_list = self.number_to_base(i, self.env.num_bs + 1, num_digits=self.env.max_ues) assert self.env.action_space.contains(action_list) return action_list
def test_ith_action(self, i)
-
Test the i-th action and return the action and reward
Expand source code
def test_ith_action(self, i): """Test the i-th action and return the action and reward""" # configure logging each time; necessary for parallel execution with joblib config_logging() self.env.set_log_level({'deepcomp.util.simulation': logging.DEBUG}) action_list = self.get_ith_action(i) # need to test the action in dict form action_dict = self.env.get_ue_actions(action_list) rewards = self.env.test_ue_actions(action_dict) reward = self.env.step_reward(rewards) return action_list, reward