Source code for casm.project.selection

from __future__ import (absolute_import, division, print_function, unicode_literals)
from builtins import *

# conda's current version of pandas raises these warnings, but they are safe
# see: https://stackoverflow.com/questions/40845304
import warnings
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")

import copy
from io import StringIO
import json
import os
import subprocess

import numpy as np
import pandas
import six

from casm.project.project import Project
from casm.project.query import query
from casm.misc import compat

[docs]class Selection(object): """ A Selection object contains information about a CASM project Attributes ---------- proj: casm.Project, optional, default=Project containing the current working directory the CASM project the selection belongs to path: string, optional, default="MASTER" path to selection file, or "MASTER" (Default="MASTER") all: bool, optional, default=True if True, self.data will include all configurations, whether selected or not. If False, only selected configurations will be included. data: pandas.DataFrame A pandas.DataFrame describing the selected configurations. Has at least 'configname' and 'selected' (as bool) columns. """
[docs] def __init__(self, proj=None, path="MASTER", all=True): """ Construct a CASM Project representation. Arguments --------- proj: casm.Project, optional, default=Project containing the current working directory the CASM project the selection belongs to path: string, optional, default="MASTER" path to selection file, or "MASTER" (Default="MASTER") all: bool, optional, default=True if True, self.data will include all configurations, whether selected or not. If False, only selected configurations will be included. """ if proj == None: proj = Project() elif not isinstance(proj, Project): raise Exception("Error constructing Selection: proj argument is not a CASM project") self.proj = proj self.path = path if os.path.isfile(path): self.path = os.path.abspath(path) self.all = all self._data = None # reserved for use by casm.plotting self.src = None
@property def data(self): """ Get Selection data as a pandas.DataFrame If the data is modified, 'save' must be called for CASM to use the modified selection. """ if self._data is None: if self.path in ["MASTER", "ALL", "CALCULATED"]: self._data = query(self.proj, ['configname', 'selected'], self, all=self.all) elif self._is_json(): self._data = pandas.read_json(self.path, 'r', orient='records') else: with open(self.path, compat.pandas_rmode()) as f: if compat.peek(f) == '#': f.read(1) self._data = pandas.read_csv(f, sep=compat.str(' +'), engine='python') self._clean_data() if not self.all: self._data = self._data[self._data['selected']==True] return self._data
[docs] def save(self, data=None, force=False): """ Save the current selection. Also allows completely replacing the 'data' describing the selected configurations. Args: data: None (default), or pandas.DataFrame describing the Selection with 'configname' and 'selected' columns. If path=="MASTER", Configurations not included in 'data' will be set to not selected. force: Boolean, force overwrite existing files """ if self.path == "MASTER": if data is not None: self._data = data.copy() self._clean_data() if self._data is None: return clist = self.proj.dir.config_list() backup = clist + ".tmp" if os.path.exists(backup): raise Exception("File: " + backup + " already exists") # read with open(clist, 'rb') as f: j = json.loads(f.read().decode('utf-8')) for sk, sv in six.iteritems(j["supercells"]): for ck, cv in six.iteritems(sv): sv[ck]["selected"] = False # set selection for index, row in self._data.iterrows(): scelname, configid = row["configname"].split('/') j["supercells"][scelname][configid]["selected"] = row["selected"] # write with open(backup, 'wb') as f: f.write(six.u(json.dumps(j, indent=2)).encode('utf-8')) os.rename(backup, clist) # refresh proj config list self.proj.refresh(read_configs=True) elif self.path in ["ALL", "CALCULATED"]: raise Exception("Cannot save the '" + self.path + "' Selection") else: if data is not None: self._data = data.copy() self._clean_data() if os.path.exists(self.path) and not force: raise Exception("File: " + self.path + " already exists") backup = self.path + ".tmp" if os.path.exists(backup): raise Exception("File: " + backup + " already exists") if self._is_json(): self._data.to_json(backup, orient='records') else: self.data.loc[:,"selected"] = self.data.loc[:,"selected"].astype(np.int_) with open(backup, compat.pandas_wmode()) as f: f.write('# ') # will make this optional in a future version self._data.to_csv(f, sep=compat.str(' '), index=False) os.rename(backup, self.path)
[docs] def saveas(self, path, force=False): """ Create a new Selection from this one, save and return it Args: path: path to selection file (Default="MASTER") force: Boolean, force overwrite existing files Returns: sel: the new Selection created from this one """ sel = Selection(self.proj, path, all=self.all) sel._data = self.data.copy() sel.save(force=force) return sel
def _is_json(self): return self.path[-5:].lower() == ".json" def _clean_data(self): self._data.loc[:,'selected'] = self._data.loc[:,'selected'].astype(bool)
[docs] def query(self, columns, force=False, verbose=False): """ Query requested columns and store them in 'data'. Will not overwrite columns that already exist, unless 'force'==True. Will query data for all configurations, whether selected or not, if self.all == True. """ if force == False: _col = [x for x in columns if x not in self.data.columns] else: _col = columns if verbose: print("# Query requested:", columns) if force == False: print("# Use existing:", [x for x in columns if x in self.data.columns]) else: print("# Overwrite existing:", [x for x in columns if x in self.data.columns]) if len(_col) == 0: print("# No query necessary") else: print("# Querying:", _col) if len(_col) == 0: return df = query(self.proj, _col, self, all=self.all) if verbose: print("# DONE\n") msg = "querying different numbers of records: {0}, {1}".format( self.data.shape, df.shape) assert self.data.shape[0] == df.shape[0], msg for c in df.columns: self.data.loc[:,c] = df.loc[:,c].values
[docs] def write_pos(self, all=False): """ Write POS file for configurations Arguments --------- all: bool, optional, default=False if True, will write POS file for all configurations in the selection whether selected or not. If False, only write POS file for selected configurations. """ self.proj.command("query -c " + self.path + " --write-pos")
[docs] def add_data(self, name, data=None, force=False): """ Equivalent to: if name not in sel.data.columns or force == True: if data is None: sel.query([name], force) else: sel.data.loc[:,name] = data """ if name not in self.data.columns or force == True: if data is None: self.query([name], force) else: self.data.loc[:,name] = data