CASM  1.1.0
A Clusters Approach to Statistical Mechanics
Import_impl.hh
Go to the documentation of this file.
1 #ifndef CASM_DB_Import_impl
2 #define CASM_DB_Import_impl
3 
4 #include <boost/filesystem/fstream.hpp>
5 
7 #include "casm/app/import.hh"
10 #include "casm/database/Import.hh"
13 
14 namespace CASM {
15 namespace DB {
16 
21 template <typename OutputIterator>
22 std::pair<OutputIterator, int> construct_pos_paths(
23  const PrimClex &primclex, const Completer::ImportOption &import_opt,
24  OutputIterator result) {
25  const po::variables_map &vm = import_opt.vm();
26  Index count = 0;
27  bool missing_files(false);
28 
29  // count files, check if existing, print warning messages for missing files
30  auto lambda = [&](const fs::path &p) {
31  ++count;
32  if (!fs::exists(p)) {
33  if (!missing_files) {
34  err_log() << "*** ERROR: Missing file(s):\n";
35  }
36  missing_files = true;
37  err_log() << " " << fs::absolute(p) << "\n";
38  }
39  *result++ = fs::absolute(p);
40  return true;
41  };
42 
43  // read all --pos paths
44  std::for_each(import_opt.pos_vec().begin(), import_opt.pos_vec().end(),
45  lambda);
46 
47  // read all the paths from a batch file
48  if (vm.count("batch")) {
49  if (!fs::exists(import_opt.batch_path())) {
50  err_log() << "ERROR: Batch import file does not exist at "
51  << import_opt.batch_path() << "\n";
52  return std::make_pair(result, ERR_MISSING_INPUT_FILE);
53  }
54 
55  fs::ifstream batchfile(import_opt.batch_path());
56  fs::path tpath;
57  while (batchfile >> tpath) {
58  lambda(tpath);
59  ++count;
60  batchfile.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
61  }
62  if (tpath.string().size() != 0 && fs::exists(tpath)) {
63  lambda(tpath);
64  }
65  }
66 
67  if (count == 0) {
68  err_log() << "ERROR: No files specified for import.\n";
69  if (vm.count("batch")) {
70  err_log() << " Check batch file for errors.\n";
71  }
72  return std::make_pair(result, ERR_INVALID_INPUT_FILE);
73  }
74 
75  if (missing_files) {
76  return std::make_pair(result, ERR_INVALID_INPUT_FILE);
77  }
78 
79  return std::make_pair(result, 0);
80 }
81 
82 // --- ImportT ---
83 
85 template <typename _ConfigType>
87  const StructureMap<ConfigType> &mapper,
88  ImportSettings const &_set,
89  std::string report_dir)
91  m_structure_mapper(mapper),
92  m_set(_set),
93  m_report_dir(report_dir) {}
94 
95 template <typename _ConfigType>
96 template <typename PathIterator>
97 void ImportT<_ConfigType>::import(PathIterator begin, PathIterator end) {
98  // vector of Mapping results, may be >1 per input if primitive and
99  // non-primitive
100  std::vector<ConfigIO::Result> results;
101 
102  // map of data import results
103  // 'configname' -> 'preexisting?
104  std::map<std::string, bool> preexisting;
105 
106  auto const &project_settings = this->primclex().settings();
107  auto calctype = project_settings.default_clex().calctype;
108  auto required_properties =
109  project_settings.required_properties(traits<ConfigType>::name, calctype);
110 
111  Log &log = CASM::log();
112  auto it = begin;
113  for (; it != end; ++it) {
114  log << "Importing " << resolve_struc_path(it->string(), primclex())
115  << std::endl;
116  ;
117 
118  std::vector<ConfigIO::Result> tvec;
119 
120  // Outputs one or more mapping results from the structure located at specied
121  // path
122  // See _import documentation for more.
123  m_structure_mapper.map(resolve_struc_path(it->string(), primclex()),
124  required_properties, nullptr,
125  std::back_inserter(tvec));
126  for (auto &res : tvec) {
127  // reasons to import data or not:
128  //
129  // could not map || no data || !m_import_data:
130  // do not import
131  // !has_existing_data_or_files:
132  // import
133  // has_existing_data_or_files:
134  // new_data:
135  // better_score:
136  // import
137  // !better_score:
138  // do not import
139  // !new_data:
140  // better_score:
141  // overwrite:
142  // import
143  // !overwrite:
144  // do not import
145  // !better_score:
146  // do not import
147  // if could not map, no data, or do not import data, continue
148  if (!res.properties.to.empty() && res.has_data && settings().import) {
149  // std::cout << "res.properties.to: " << res.properties.to << ";
150  // res.has_data: "
151  //<< res.has_data << " settings().import: " << settings.import << "\n";
152  // we will try to import data
153 
154  // note if preexisting data before this batch
155  auto p_it = preexisting.find(res.properties.to);
156  if (p_it == preexisting.end()) {
157  p_it = preexisting
158  .emplace(res.properties.to,
159  has_existing_data_or_files(res.properties.to))
160  .first;
161  }
162  res.import_data.preexisting = p_it->second;
163 
164  // insert properties
165  db_props().insert(res.properties);
166  }
167 
168  results.push_back(res);
169  }
170  }
171 
172  // Copy files as needed/requested
173  this->_copy_files(results);
174 
175  this->_import_report(results);
176 
177  db_supercell().commit();
178  db_config<ConfigType>().commit();
179  db_props().commit();
180 }
181 
182 // *********************************************************************************
183 
184 template <typename _ConfigType>
186  std::vector<ConfigIO::Result> &results) const {
187  if (!settings().import ||
188  !(settings().copy_files || settings().additional_files))
189  return;
190 
191  for (auto res : results) {
192  std::string to_config = res.properties.to;
193  if (to_config.empty()) continue;
194 
195  auto db_it = db_props().find_via_to(to_config);
196  std::string origin = db_it->origin;
197 
198  if (origin != res.properties.origin) continue;
199 
200  // note that this import to configuration is best in case of conflicts
201  res.import_data.is_best = true;
202 
203  // if preexisting data, do not import new data unless overwrite option set
204  // if last_insert is not empty, it means the existing data was from this
205  // batch and we can overwrite
206  if (res.import_data.preexisting && !settings().overwrite) {
207  continue;
208  }
209 
210  db_props().erase(db_it);
211  // copy files:
212  // there might be existing files in cases of import conflicts
213  // cp_files() will update res.properties.file_data
214  rm_files(to_config, false);
215  this->cp_files(res, false, settings().additional_files);
216  db_props().insert(res.properties);
217  }
218 }
219 
220 // *********************************************************************************
221 
222 template <typename _ConfigType>
224  std::vector<ConfigIO::Result> &results) const {
225  // map_fail: could not map
226  // map_success: could map
227  // import_data_fail: would import but couldn't (score < best_score &&
228  // !data_results.count(from)) import_data_conflicts: conflicts with other in
229  // import batch && preexisting
230  // - pos, config, score_method, chosen?, overwrite?, import data?, import
231  // additional files?, score, best_score, is_preexisting?
232 
233  // list of structures that could not be mapped
234  std::vector<ConfigIO::Result> map_fail;
235 
236  // list of structures that could be mapped
237  std::vector<ConfigIO::Result> map_success;
238 
239  // list of structures that would be imported except preexisting data prevents
240  // it
241  std::vector<ConfigIO::Result> import_data_fail;
242 
243  std::string prefix = "import_";
245 
246  std::map<std::string, int> all_to;
247 
248  for (auto const &res : results) {
249  if (res.properties.to.empty()) {
250  map_fail.push_back(res);
251  } else {
252  auto it = all_to.find(res.properties.to);
253  if (it == all_to.end()) {
254  it = all_to.insert(std::make_pair(res.properties.to, 0)).first;
255  }
256  ++(it->second);
257 
258  map_success.push_back(res);
259  if (res.has_data && settings().import &&
260  db_props().find_via_to(res.properties.to) != db_props().end() &&
261  db_props().score(res.properties) <
262  db_props().best_score(res.properties.to)) {
263  import_data_fail.push_back(res);
264  }
265  }
266  }
267 
268  // list of conflicts (multiple config with same 'to')
269  std::vector<ConfigIO::Result> conflict;
270  for (auto const &res : results) {
271  if (!res.properties.to.empty() && all_to[res.properties.to] > 1) {
272  conflict.push_back(res);
273  }
274  }
275 
276  // output a 'batch' file with paths to structures that could not be imported
277  if (map_fail.size()) {
278  fs::path p = fs::path(m_report_dir) / (prefix + "_fail");
279  fs::ofstream sout(p);
280 
281  log() << "WARNING: Could not import " << map_fail.size() << " structures."
282  << std::endl;
283  log() << " See detailed report: " << p << std::endl << std::endl;
284 
287  auto formatter = dict.parse({"initial_path", "fail_msg"});
288  sout << formatter(map_fail.begin(), map_fail.end());
289  }
290 
291  // - pos, config, score_method, import data?, import additional files?, score,
292  // best_score, is_preexisting?
293  auto formatter = _import_formatter();
294 
295  if (map_success.size()) {
296  fs::path p = fs::path(m_report_dir) / (prefix + "_success");
297  fs::ofstream sout(p);
298 
299  log() << "Successfully imported " << map_success.size() << " structures."
300  << std::endl;
301  log() << " See detailed report: " << p << std::endl << std::endl;
302 
303  sout << formatter(map_success.begin(), map_success.end());
304  }
305 
306  if (import_data_fail.size()) {
307  fs::path p = fs::path(m_report_dir) / (prefix + "_data_fail");
308  fs::ofstream sout(p);
309 
310  log() << "WARNING: Did not import data from " << import_data_fail.size()
311  << " structures which have are a mapping score"
312  " better than the existing data."
313  << std::endl;
314  log() << " You may wish to inspect these structures and allow overwriting "
315  "or remove existing data manually."
316  << std::endl;
317  log() << " See detailed report: " << p << std::endl << std::endl;
318 
319  sout << formatter(import_data_fail.begin(), import_data_fail.end());
320  }
321 
322  if (conflict.size()) {
323  fs::path p = fs::path(m_report_dir) / (prefix + "_conflict");
324  fs::ofstream sout(p);
325 
326  log()
327  << "WARNING: Imported data from structures that mapped to the same "
328  "configuration."
329  << std::endl
330  << " Data can only be imported from one of the conflicting structures."
331  << std::endl
332  << " Based on the current conflict resolution method the 'best' "
333  "result was automatically chosen, "
334  << std::endl
335  << " but you may wish to inspect these results and manually select "
336  "which structures to import."
337  << std::endl;
338  log() << " See detailed report: " << p << std::endl << std::endl;
339 
340  sout << formatter(conflict.begin(), conflict.end());
341  }
342 }
343 
344 } // namespace DB
345 } // namespace CASM
346 
347 #endif
Index count
#define ERR_MISSING_INPUT_FILE
Definition: errors.hh:19
#define ERR_INVALID_INPUT_FILE
Definition: errors.hh:16
const std::vector< fs::path > & pos_vec() const
Definition: import.cc:18
const fs::path & batch_path() const
Definition: import.cc:20
po::variables_map & vm()
Get the variables map.
Definition: Handlers.cc:310
Generic ConfigType-dependent part of Import.
Definition: Import.hh:66
ImportT(const PrimClex &primclex, const StructureMap< ConfigType > &mapper, ImportSettings const &_set, std::string report_dir)
Constructor.
Definition: Import_impl.hh:86
Parsing dictionary for constructing a DataFormatter<DataObject> object.
DataFormatter< DataObject > parse(const std::string &input) const
Use the vector of strings to build a DataFormatter<DataObject>
std::pair< iterator, bool > insert(const value_type &value)
Insert single value.
Definition: unique_map.hh:149
Definition: Log.hh:48
PrimClex is the top-level data structure for a CASM project.
Definition: PrimClex.hh:55
GenericDatumFormatter< std::string, Result > initial_path()
Definition: ConfigData.cc:70
GenericDatumFormatter< std::string, Result > fail_msg()
Definition: ConfigData.cc:81
std::pair< OutputIterator, int > construct_pos_paths(const PrimClex &primclex, const Completer::ImportOption &import_opt, OutputIterator result)
Construct pos_paths from input args –pos && –batch.
Definition: Import_impl.hh:22
Main CASM namespace.
Definition: APICommand.hh:8
Log & log()
Definition: Log.hh:424
INDEX_TYPE Index
For long integer indexing:
Definition: definitions.hh:39
T max(const T &A, const T &B)
Definition: CASM_math.hh:95
Log & err_log()
Definition: Log.hh:426
PrimClex * primclex
Definition: settings.cc:135
Log & log
Definition: settings.cc:139
pair_type calctype
Definition: settings.cc:143
Struct with optional parameters for Config/Data Import Specifies default parameters for all values,...
Definition: Import.hh:34