13 namespace Import_impl {
17 using Data = std::tuple<fs::path, jsonParser, std::string, std::pair<bool, double> >;
50 (
"pos,p", po::value<std::vector<fs::path> >(&
m_pos_vec)->multitoken()->value_name(
ArgHandler::path()),
"Path(s) to structure(s) being imported (multiple allowed, but no wild-card matching)")
52 "Adjusts cost function for mapping optimization (cost=w*lattice_deformation+(1-w)*basis_deformation).")
53 (
"min-energy",
"Resolve mapping conflicts based on energy rather than deformation.")
54 (
"max-vol-change", po::value<double>(&
m_vol_tolerance)->default_value(0.25),
55 "Adjusts range of SCEL volumes searched while mapping imported structure onto ideal crystal (only necessary if the presence of vacancies makes the volume ambiguous). Default is +/- 25% of relaxed_vol/prim_vol. Smaller values yield faster import, larger values may yield more accurate mapping.")
56 (
"max-va-frac", po::value<double>(&
m_max_va_frac)->default_value(0.5),
57 "Places upper bound on the fraction of sites that are allowed to be vacant after imported structure is mapped onto the ideal crystal. Smaller values yield faster execution, larger values may yield more accurate mapping. Has no effect if supercell volume can be inferred from the number of atoms in the structure. Default value allows up to 50% of sites to be vacant.")
58 (
"min-va-frac", po::value<double>(&
m_min_va_frac)->default_value(0.),
59 "Places lower bound on the fraction of sites that are allowed to be vacant after imported structure is mapped onto the ideal crystal. Nonzero values may yield faster execution if updating configurations that are known to have a large number of vacancies, at potential sacrifice of mapping accuracy. Has no effect if supercell volume can be inferred from the number of atoms in the structure. Default value allows as few as 0% of sites to be vacant.")
60 (
"batch,b", po::value<fs::path>(&
m_batch_path)->value_name(
ArgHandler::path()),
"Path to batch file, which should list one structure file path per line (can be used in combination with --pos)")
61 (
"rotate,r",
"Rotate structure to be consistent with setting of PRIM")
62 (
"ideal,i",
"Assume imported structures are unstrained (ideal) for faster importing. Can be slower if used on deformed structures, in which case more robust methods will be used")
64 (
"data,d",
"Attempt to extract calculation data (properties.calc.json file) from the enclosing directory of the structure files, if it is available")
65 (
"copy-additional-files",
"Recursively copy other files from the same directory as the properties.calc.json file.");
101 double lattice_weight;
102 std::vector<fs::path> pos_paths;
105 po::variables_map vm;
111 po::store(po::parse_command_line(args.
argc, args.
argv, import_opt.
desc()), vm);
115 if(vm.count(
"help")) {
116 args.
log << std::endl;
117 args.
log << import_opt.
desc() << std::endl;
122 if(vm.count(
"desc")) {
124 args.
log << import_opt.
desc() << std::endl;
126 args.
log <<
"DESCRIPTION" << std::endl;
127 args.
log <<
" Import structure specified by --pos. If it doesn't exist make a directory for it and copy data over" << std::endl;
128 args.
log <<
" If a *.json file is specified, it will be interpreted as a 'calc.properties.json' file." << std::endl;
137 pos_paths = import_opt.
pos_vec();
141 catch(po::error &e) {
143 args.
err_log <<
"ERROR: " << e.what() << std::endl << std::endl;
146 catch(std::exception &e) {
148 args.
err_log <<
"ERROR: " << e.what() << std::endl << std::endl;
153 if(!vm.count(
"pos") && !vm.count(
"batch")) {
155 args.
err_log <<
"No structures specified for import (specify structures using --pos or --batch)." << std::endl;
160 if(vm.count(
"batch")) {
161 if(!fs::exists(batch_path)) {
162 args.
err_log <<
"ERROR: Batch import file does not exist at " << batch_path <<
"\n";
165 fs::ifstream batchfile(batch_path);
167 while(batchfile >> tpath) {
168 pos_paths.push_back(tpath);
171 if(tpath != pos_paths.back() && tpath.string().size() != 0 && fs::exists(tpath))
172 pos_paths.push_back(tpath);
175 if(pos_paths.size() == 0) {
176 args.
err_log <<
"ERROR: No files specified for import.\n";
177 if(vm.count(
"batch"))
178 args.
err_log <<
" Check batch file for errors.\n";
181 bool missing_files(
false);
182 for(
auto it = pos_paths.cbegin(); it != pos_paths.cend(); ++it) {
183 if(!fs::exists(*it)) {
185 args.
err_log <<
"*** ERROR: Missing file(s):\n";
186 missing_files =
true;
187 args.
err_log <<
" " << fs::absolute(*it) <<
"\n";
205 std::unique_ptr<PrimClex> uniq_primclex;
213 ConfigMapper configmapper(primclex, lattice_weight, vol_tol, map_opt, tol);
221 std::map<Configuration *, std::vector<Import_impl::Data> > import_map;
222 std::vector<std::string > error_log;
225 args.
log <<
" Beginning import of " << pos_paths.size() <<
" configuration" << (pos_paths.size() > 1 ?
"s" :
"") <<
"...\n" << std::endl;
226 for(
auto it = pos_paths.begin(); it != pos_paths.end(); ++it) {
227 if(it != pos_paths.begin())
228 args.
log <<
"\n***************************\n" << std::endl;
232 pos_path = fs::absolute(*it, pwd);
233 std::string imported_name;
238 if(vm.count(
"data")) {
240 if(!dft_path.empty()) {
247 std::pair<bool, double> checkenergy(
false, 0.0);
250 if(pos_path.extension() ==
".json" || pos_path.extension() ==
".JSON") {
252 if(datajson.
contains(
"relaxed_energy")) {
253 checkenergy = std::pair<bool, double>(
true, datajson[
"relaxed_energy"].
get<
double>());
258 fs::ifstream struc_stream(pos_path);
259 import_struc.
read(struc_stream);
263 std::vector<Index> best_assignment;
266 args.
log <<
" " << pos_path <<
"\n was imported successfully as " << imported_name << std::endl << std::endl;
270 args.
log <<
" " << pos_path <<
"\n mapped onto pre-existing equivalent structure " << imported_name << std::endl << std::endl;
272 relax_data = fullrelax_data[
"best_mapping"];
273 args.
log <<
" Relaxation stats -> lattice_deformation = " << relax_data[
"lattice_deformation"].
get<
double>()
274 <<
" basis_deformation = " << relax_data[
"basis_deformation"].get<double>() << std::endl << std::endl;;
276 catch(std::exception &e) {
277 args.
err_log <<
" ERROR: Unable to import " << pos_path <<
" because \n"
278 <<
" -> " << e.what() <<
"\n\n";
279 error_log.push_back(it->string() +
"\n -> " + e.what());
280 if(it != pos_paths.cend()) {
281 args.
log <<
" Continuing...\n";
293 if(!vm.count(
"data"))
296 std::stringstream contcar_ss;
298 import_map[&imported_config].push_back(
Import_impl::Data(pos_path, relax_data, contcar_ss.str(), checkenergy));
303 std::stringstream conflict_log;
304 if(vm.count(
"data")) {
305 args.
log <<
" Attempting to import data..." << std::endl;
306 auto it(import_map.begin()), end_it(import_map.end());
307 for(; it != end_it; ++it) {
309 std::vector<Import_impl::Data> &data_vec(it->second);
312 bool preexisting(
false);
316 Index mult = data_vec.size() +
Index(preexisting);
317 double best_weight(1e19);
318 double best_energy(1e19);
319 Index best_conflict(0), best_ind(0);
321 conflict_log <<
" CONFLICT -> " << mult <<
" matching structures for config " << imported_config.
name() <<
": " << std::endl;
322 double w = lattice_weight;
323 Index conflict_ind(1);
326 conflict_log <<
" 1) Pre-existing data for " << imported_config.
name() <<
" before import." << std::endl
327 <<
" Relaxation stats:" << std::endl;
331 conflict_log <<
" -- lattice_deformation = " << ld <<
"; basis_deformation = " << bd <<
"; weighted avg = " << w *ld + (1.0 - w)*bd << std::endl;
332 if(!vm.count(
"min-energy")) {
333 best_weight = w * ld + (1.0 - w) * bd;
339 conflict_log <<
" -- lattice_deformation = unknown; basis_deformation = unknown; weighted avg = unknown" << std::endl;
342 conflict_log <<
" -- relaxed_energy = " << imported_config.
calc_properties()[
"relaxed_energy"].
get<
double>() << std::endl;
343 if(vm.count(
"min-energy")) {
350 conflict_log <<
" -- relaxed_energy = unknown" << std::endl;
351 conflict_log << std::endl;
353 for(
Index i = 0; i < data_vec.size(); i++) {
354 fs::path pos_path = std::get<Import_impl::path>(data_vec[i]);
355 conflict_log <<
" " << conflict_ind++ <<
") Structure imported from " << pos_path <<
"." << std::endl
356 <<
" Relaxation stats:" << std::endl;
358 double ld = relaxjson[
"lattice_deformation"].
get<
double>();
359 double bd = relaxjson[
"basis_deformation"].
get<
double>();
360 conflict_log <<
" -- lattice_deformation = " << ld <<
"; basis_deformation = " << bd <<
"; weighted avg = " << w *ld + (1.0 - w)*bd << std::endl;
361 if(std::get<Import_impl::energy>(data_vec[i]).first)
362 conflict_log <<
" -- relaxed_energy = " << std::get<Import_impl::energy>(data_vec[i]).second << std::endl;
364 conflict_log <<
" -- relaxed_energy = unknown" << std::endl;
365 conflict_log << std::endl;
366 if(vm.count(
"min-energy")) {
367 if(std::get<Import_impl::energy>(data_vec[i]).first) {
368 if(std::get<Import_impl::energy>(data_vec[i]).second < best_energy) {
369 best_energy = std::get<Import_impl::energy>(data_vec[i]).second;
370 best_conflict = conflict_ind - 1;
376 if(w * ld + (1.0 - w)*bd < best_weight) {
377 best_weight = w * ld + (1.0 - w) * bd;
378 best_conflict = conflict_ind - 1;
384 conflict_log <<
" ==> Resolution: No data will be imported since data already exists" << std::endl;
386 if(!vm.count(
"min-energy"))
387 conflict_log <<
" *** WARNING: Conflicting config #" << best_conflict <<
" maps more closely onto ideal crystal! ***" << std::endl;
389 conflict_log <<
" *** WARNING: Conflicting config #" << best_conflict <<
" has a lower energy! ***" << std::endl;
393 if(!vm.count(
"min-energy"))
394 conflict_log <<
" ==> Resolution: Import data from closest match, structure #" << best_conflict << std::endl;
396 conflict_log <<
" ==> Resolution: Import data from lowest energy config, structure #" << best_conflict << std::endl;
398 conflict_log <<
"\n ----------------------------------------------\n" << std::endl;
404 fs::path pos_path = std::get<Import_impl::path>(data_vec[best_ind]);
405 if(pos_path.extension() !=
".json" && pos_path.extension() !=
".JSON") {
406 args.
log <<
" No calculation data was found in the enclosing directory of \n"
407 <<
" " << pos_path << std::endl
408 <<
" Continuing..." << std::endl;
414 _cp_files(pos_path, imported_config, vm.count(
"copy-additional-files"), args.
log);
420 fs::ofstream contcar_out(import_target /
"relaxed_structure.vasp");
421 contcar_out << std::get<Import_impl::contcar>(data_vec[best_ind]);
426 args.
log <<
" WARNING: Some properties from " << pos_path <<
" were not valid. Viable values will still be recorded.\n";
431 auto jit = relaxjson.
cbegin(), jit_end = relaxjson.
cend();
432 for(; jit != jit_end; ++jit) {
433 calc_data[jit.name()] = *jit;
441 args.
log <<
"\n***************************\n" << std::endl;
443 args.
log <<
" Finished importing " << pos_paths.size() <<
" structures";
445 args.
log <<
" (none of these are new or unique)";
446 else if(n_unique < pos_paths.size())
447 args.
log <<
" (only " << n_unique <<
" of these " << (n_unique == 1 ?
"is" :
"are") <<
" new and unique)";
448 args.
log <<
"." << std::endl;
451 args.
log <<
" Writing SCEL..." << std::endl;
453 args.
log <<
" Writing config_list..." << std::endl << std::endl;
455 args.
log <<
" DONE" << std::endl << std::endl;
457 if(error_log.size() > 0) {
458 args.
log <<
" WARNING: --The following paths could not be imported due to errors:\n";
459 for(
auto it = error_log.cbegin(); it != error_log.cend(); ++it) {
461 <<
"\n ----------------------------------------------\n" << std::endl;
463 args.
log <<
"\n" << std::endl;
465 if(conflict_log.str().size()) {
466 args.
log <<
" WARNING: -- The following conflicts were found\n" << std::endl
467 << conflict_log.str() << std::endl;
469 args.
log <<
" Please review these conflicts. A different resolution can be obtained by removing datafiles from\n"
470 <<
" the training_data directory and performing an import using a manually reduced set of files.\n";
472 args.
log <<
" DONE" << std::endl << std::endl;
474 args.
log << std::endl;
483 return std::distance(fs::directory_iterator(p), fs::directory_iterator());
498 if(pos_path.extension() ==
".json" || pos_path.extension() ==
".JSON") {
505 dft_path.remove_filename();
507 if(fs::exists(dft_path)) {
515 dft_path.remove_filename();
516 dft_path /=
"properties.calc.json";
517 if(fs::exists(dft_path)) {
537 fs::create_directories(p);
541 if(calc_props_path.empty()) {
545 log.
custom(std::string(
"Copy calculation files: ") + config.
name());
546 if(!copy_additional_files) {
547 log <<
"cp " << calc_props_path <<
" " << p << std::endl;
548 fs::copy_file(calc_props_path, p / calc_props_path.filename());
557 auto it = fs::directory_iterator(from_dir);
558 auto end = fs::directory_iterator();
559 for(; it != end; ++it) {
560 if(fs::is_regular_file(*it)) {
561 log <<
"cp " << *it <<
" " << to_dir << std::endl;
562 fs::copy_file(*it, to_dir / it->path().filename());
565 fs::path new_to_dir = to_dir / it->path().filename();
566 fs::create_directories(new_to_dir);
Data structure holding basic CASM command info.
void initialize() override
Fill in the options descriptions accordingly.
std::vector< fs::path > m_pos_vec
void write_config_list(std::set< std::string > scel_to_delete={})
double lattice_weight() const
void from_json(ClexDescription &desc, const jsonParser &json)
bool _has_existing_files(fs::path p)
PrimClex & get_primclex() const
Get the PrimClex for this Configuration.
void add_help_suboption()
Add a plain –help suboption.
void push_back_source(const jsonParser &source)
const std::vector< fs::path > & pos_vec() const
double max_va_frac() const
const_iterator cend() const
Returns const_iterator to end of JSON object or JSON array.
static std::string path()
Get value_type string for path completion.
fs::path calc_dir() const
T get(Args...args) const
Get data from json, using one of several alternatives.
bool read_calc_properties(jsonParser &parsed_props) const
void print(std::ostream &sout)
Print POSCAR to stream.
void custom(const std::string &what)
const Properties & calc_properties() const
int import_command(const CommandArgs &args)
const po::options_description & desc()
Get the program options, filled with the initialized values.
COORD_MODE specifies the current coordinate mode (Fractional or Cartesian)
fs::path get_pos_path() const
Path to various files.
void _cp_files(const fs::path &pos_path, const Configuration &config, bool copy_additional_files, Log &log)
Copy files in the same directory as properties.calc.json into the traning_data directory for a partic...
void set_min_va_frac(double _min_va)
EigenIndex Index
For long integer indexing:
po::options_description m_desc
Boost program options. All the derived classes have them, but will fill them up themselves.
fs::path get_path() const
ProjectSettings & settings()
PrimClex is the top-level data structure for a CASM project.
double min_va_frac() const
T max(const T &A, const T &B)
SimpleJSonSiteStructure< true > simple_json(const BasicStructure< Site > &struc, const std::string &prefix)
std::string name() const
SCELV_A_B_C_D_E_F/i.
double vol_tolerance() const
virtual void read(std::istream &stream)
Print intpolated images in seperate directries.
const Configuration & configuration(const std::string &configname) const
access configuration by name (of the form "scellname/[NUMBER]", e.g., ("SCEL1_1_1_1_0_0_0/0") ...
const fs::path & batch_path() const
void _recurs_cp_files(const fs::path &from_dir, const fs::path &to_dir, Log &log)
void print_supercells(std::set< std::string > scel_to_delete={}) const
void set_calc_properties(const jsonParser &json)
Read calculation results into the configuration.
void set_max_va_frac(double _max_va)
void write_pos() const
Write the POS file to get_pos_path.
PrimClex & make_primclex_if_not(const CommandArgs &args, std::unique_ptr< PrimClex > &uniq_primclex)
If !_primclex, construct new PrimClex stored in uniq_primclex, then return reference to existing or c...
Print POSCAR with formating options.
bool contains(const std::string &name) const
Return true if JSON object contains 'name'.
fs::path _calc_properties_path(const PrimClex &primclex, fs::path pos_path)
Return path to properties.calc.json that will be imported checking a couple possible locations relati...
void error(const std::string &what)
std::tuple< fs::path, jsonParser, std::string, std::pair< bool, double > > Data
const_iterator cbegin() const
Returns const_iterator to beginning of JSON object or JSON array.
const ClexDescription & default_clex() const
A Configuration represents the values of all degrees of freedom in a Supercell.
bool valid_index(Index i)
bool import_structure_occupation(const fs::path &pos_path, std::string &imported_name, jsonParser &relaxation_properties, std::vector< Index > &best_assignment, Eigen::Matrix3d &cart_op) const
imports structure specified by 'pos_path' into primclex() by finding optimal mapping and then setting...