Source code for casm.project.selection

from __future__ import (absolute_import, division, print_function, unicode_literals)
from builtins import *

# conda's current version of pandas raises these warnings, but they are safe
# see: https://stackoverflow.com/questions/40845304
import warnings
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")

import copy
from io import StringIO
import json
import os
import subprocess

import numpy as np
import pandas
import six

from casm.project.project import Project
from casm.project.query import query
from casm.misc import compat

[docs]class Selection(object):
    """
    A Selection object contains information about a CASM project

    Attributes
    ----------

      proj: casm.Project, optional, default=Project containing the current working directory
        the CASM project the selection belongs to

      path: string, optional, default="MASTER"
        path to selection file, or "MASTER" (Default="MASTER")

      all: bool, optional, default=True
        if True, self.data will include all configurations, whether selected or
        not. If False, only selected configurations will be included.

      data: pandas.DataFrame
        A pandas.DataFrame describing the selected configurations. Has at least
        'configname' and 'selected' (as bool) columns.

    """
[docs]    def __init__(self, proj=None, path="MASTER", all=True):
        """
        Construct a CASM Project representation.

        Arguments
        ---------

          proj: casm.Project, optional, default=Project containing the current working directory
            the CASM project the selection belongs to

          path: string, optional, default="MASTER"
            path to selection file, or "MASTER" (Default="MASTER")

          all: bool, optional, default=True
            if True, self.data will include all configurations, whether selected or
            not. If False, only selected configurations will be included.


        """
        if proj == None:
          proj = Project()
        elif not isinstance(proj, Project):
          raise Exception("Error constructing Selection: proj argument is not a CASM project")
        self.proj = proj

        self.path = path
        if os.path.isfile(path):
          self.path = os.path.abspath(path)

        self.all = all

        self._data = None

        # reserved for use by casm.plotting
        self.src = None


    @property
    def data(self):
        """
        Get Selection data as a pandas.DataFrame

        If the data is modified, 'save' must be called for CASM to use the modified selection.
        """
        if self._data is None:
          if self.path in ["MASTER", "ALL", "CALCULATED"]:
            self._data = query(self.proj, ['configname', 'selected'], self, all=self.all)
          elif self._is_json():
            self._data = pandas.read_json(self.path, 'r', orient='records')
          else:
            with open(self.path, compat.pandas_rmode()) as f:
                if compat.peek(f) == '#':
                    f.read(1)
                self._data = pandas.read_csv(f, sep=compat.str(' +'), engine='python')

          self._clean_data()

          if not self.all:
            self._data = self._data[self._data['selected']==True]

        return self._data


[docs]    def save(self, data=None, force=False):
        """
        Save the current selection. Also allows completely replacing the 'data'
        describing the selected configurations.

        Args:
          data: None (default), or pandas.DataFrame describing the Selection with
            'configname' and 'selected' columns. If path=="MASTER", Configurations
            not included in 'data' will be set to not selected.
          force: Boolean, force overwrite existing files
        """
        if self.path == "MASTER":

          if data is not None:
            self._data = data.copy()
            self._clean_data()

          if self._data is None:
            return

          clist = self.proj.dir.config_list()
          backup = clist + ".tmp"
          if os.path.exists(backup):
            raise Exception("File: " + backup + " already exists")

          # read
          with open(clist, 'rb') as f:
              j = json.loads(f.read().decode('utf-8'))

          for sk, sv in six.iteritems(j["supercells"]):
            for ck, cv in six.iteritems(sv):
              sv[ck]["selected"] = False

          # set selection
          for index, row in self._data.iterrows():
            scelname, configid = row["configname"].split('/')
            j["supercells"][scelname][configid]["selected"] = row["selected"]

          # write
          with open(backup, 'wb') as f:
              f.write(six.u(json.dumps(j, indent=2)).encode('utf-8'))
          os.rename(backup, clist)

          # refresh proj config list
          self.proj.refresh(read_configs=True)

        elif self.path in ["ALL", "CALCULATED"]:
          raise Exception("Cannot save the '" + self.path + "' Selection")

        else:

          if data is not None:
            self._data = data.copy()
            self._clean_data()

          if os.path.exists(self.path) and not force:
            raise Exception("File: " + self.path + " already exists")

          backup = self.path + ".tmp"
          if os.path.exists(backup):
            raise Exception("File: " + backup + " already exists")

          if self._is_json():
            self._data.to_json(backup, orient='records')
          else:
            self.data.loc[:,"selected"] = self.data.loc[:,"selected"].astype(np.int_)
            with open(backup, compat.pandas_wmode()) as f:
                f.write('# ')  # will make this optional in a future version
                self._data.to_csv(f, sep=compat.str(' '), index=False)
          os.rename(backup, self.path)


[docs]    def saveas(self, path, force=False):
        """
        Create a new Selection from this one, save and return it

        Args:
          path: path to selection file (Default="MASTER")
          force: Boolean, force overwrite existing files

        Returns:
          sel: the new Selection created from this one
        """
        sel = Selection(self.proj, path, all=self.all)
        sel._data = self.data.copy()
        sel.save(force=force)
        return sel


    def _is_json(self):
        return self.path[-5:].lower() == ".json"


    def _clean_data(self):
        self._data.loc[:,'selected'] = self._data.loc[:,'selected'].astype(bool)


[docs]    def query(self, columns, force=False, verbose=False):
        """
        Query requested columns and store them in 'data'. Will not overwrite
        columns that already exist, unless 'force'==True.

        Will query data for all configurations, whether selected or not, if
        self.all == True.
        """

        if force == False:
          _col = [x for x in columns if x not in self.data.columns]
        else:
          _col = columns

        if verbose:
          print("# Query requested:", columns)
          if force == False:
            print("# Use existing:", [x for x in columns if x in self.data.columns])
          else:
            print("# Overwrite existing:", [x for x in columns if x in self.data.columns])
          if len(_col) == 0:
            print("# No query necessary")
          else:
            print("# Querying:", _col)

        if len(_col) == 0:
          return

        df = query(self.proj, _col, self, all=self.all)

        if verbose:
          print("#   DONE\n")

        msg = "querying different numbers of records: {0}, {1}".format(
          self.data.shape, df.shape)
        assert self.data.shape[0] == df.shape[0], msg

        for c in df.columns:
          self.data.loc[:,c] = df.loc[:,c].values


[docs]    def write_pos(self, all=False):
        """
        Write POS file for configurations

        Arguments
        ---------

          all: bool, optional, default=False
            if True, will write POS file for all configurations in the selection
            whether selected or not. If False, only write POS file for selected
            configurations.
        """
        self.proj.command("query -c " + self.path + " --write-pos")


[docs]    def add_data(self, name, data=None, force=False):
        """
        Equivalent to:
        if name not in sel.data.columns or force == True:
          if data is None:
            sel.query([name], force)
          else:
            sel.data.loc[:,name] = data
        """
        if name not in self.data.columns or force == True:
          if data is None:
            self.query([name], force)
          else:
            self.data.loc[:,name] = data