Source code for casm.learn.direct_selection

from __future__ import (absolute_import, division, print_function,
                        unicode_literals)
from builtins import *

import copy
from casm.learn import empty_individual
from casm.learn.model_selection import cross_val_score
from casm.learn.fit import open_halloffame, make_fitting_data, make_estimator, \
  add_individual_detail, print_halloffame


def make_population(n_features, input_options):
    """
  Read direct selection population options and use to construct a population of
  individuals to be fit.
  
  Arguments
  ---------
    
    n_features: int
      The size of the individuals to be constructed
    
    input_options: iterable of dict
      Each dict contains options specifying an individual or individuals to be
      included in the population. Three forms are recognized:
      
      bitstring:
        Ex.: {"bitstring" : "01110001100..."}
        The string may have fewer than n_features digits, but not more. If fewer,
        the rest are assumed to be '0'.
      
      indices:
        Ex.: {"indices" : [1, 2, 3, 7, 8]}
        Specify which features should be 'on' by index.
      
      from_halloffame:
        Ex.: {"from_halloffame" : "my_halloffame.pkl", "individuals" : [0, 2, 5]}
        Specifies a hall of fame and particular individuals (by index) in the hall
        of fame to include in the population. "individuals" is optional, with the
        default behaviour including all individuals in the hall of fame.
  
  Returns
  -------
    
    population: List[individuals]
      A list of the individuals to be fit
    
  """
    population = []
    for opt in input_options:

        if "bitstring" in opt:
            indiv = empty_individual(n_features)
            for index, bit in enumerate(opt["bitstring"]):
                if int(bit):
                    indiv[index] = True
            if sum(indiv) == 0:
                raise Exception(
                    "Error using making individual from bitstring: No basis functions selected"
                )
            population.append(indiv)

        elif "indices" in opt:
            indiv = empty_individual(n_features)
            for index in opt["indices"]:
                indiv[index] = True
            if sum(indiv) == 0:
                raise Exception(
                    "Error using making individual from bitstring: No basis functions selected"
                )
            population.append(indiv)

        elif "from_halloffame" in opt:
            hall = open_halloffame(opt["from_halloffame"])
            if "individuals" in opt:
                for index in opt["individuals"]:
                    population.append(hall[index])
            else:
                for indiv in hall:
                    population.append(indiv)

        else:
            print("Unrecognized option:\n")
            print(opt)
            raise Exception(
                "Error making DirectSelection population: unrecognized option")

    return population


[docs]def direct_fit(input, save=True, verbose=True, read_existing=True, hall=None): """ Fit ECI and add details for a set of individuals specified for feature_selection method 'DirectSelection' via the 'population' kwarg. Arguments --------- input: dict The input settings as a dict save: boolean, optional, default=True Save a pickle file containing the training data and scoring metric. The file name, which can be specified by input["fit_data_filename"], defaults to "fit_data.pkl". verbose: boolean, optional, default=True Print information to stdout. read_existing: boolean, optional, default=True If it exists, read the pickle file containing the training data and scoring metric. The file name, which can be specified by input["fit_data_filename"], defaults to "fit_data.pkl". hall: deap.tools.HallOfFame, optional, default=None A Hall Of Fame to add resulting individuals to Returns ------- population: List[individuals] A list of the individuals that were fit """ # construct FittingData fdata = make_fitting_data(input, save=True, verbose=verbose, read_existing=True) kwargs = input["feature_selection"]["kwargs"] population = make_population(fdata.n_features, kwargs["population"]) for indiv_i, indiv in enumerate(population): if verbose: print("Begin fitting individual", indiv_i, "of", len(population)) _input = copy.deepcopy(input) use_saved_estimator = kwargs.get("use_saved_estimator", False) # check for 'use_saved_estimator' if use_saved_estimator and getattr(indiv, "input", None) is not None: _input["estimator"] = indiv.input["estimator"] estimator = make_estimator(_input) indiv.fitness.values = cross_val_score(estimator, fdata.weighted_X, indiv, y=fdata.weighted_y, scoring=fdata.scoring, cv=fdata.cv, penalty=fdata.penalty) add_individual_detail(indiv, estimator, fdata, _input, selector=None) if verbose: print(" DONE\n") if verbose: print("Result:") print_halloffame(population) if hall is not None: hall.update(population) return population