CASM  1.1.0
A Clusters Approach to Statistical Mechanics
FileEnumerator.hh
Go to the documentation of this file.
1 #ifndef CASM_FileEnumerator
2 #define CASM_FileEnumerator
3 
4 #include <boost/filesystem.hpp>
5 
8 #include "casm/clex/PrimClex.hh"
9 #include "casm/clex/Supercell.hh"
11 
12 namespace CASM {
13 
14 // -- Get lists of files -----------------------------
15 
21  public:
23  FileEnumerator(const PrimClex &_primclex, bool _all_settings = false,
24  bool _relative = false);
25 
27  template <typename OutputIterator>
28  OutputIterator basic_files(OutputIterator result);
29 
31  template <typename OutputIterator>
32  OutputIterator bset_files(OutputIterator result);
33 
35  template <typename OutputIterator>
36  OutputIterator reference_files(OutputIterator result);
37 
39  template <typename OutputIterator>
40  OutputIterator eci_files(OutputIterator result);
41 
43  template <typename OutputIterator>
44  OutputIterator calc_settings_files(OutputIterator result);
45 
47  template <typename OutputIterator>
48  OutputIterator calc_status_files(OutputIterator result);
49 
51  template <typename OutputIterator>
52  OutputIterator all_calc_files(OutputIterator result);
53 
54  private:
56  fs::path _if_relative(fs::path path);
57 
59  template <typename OutputIterator>
60  OutputIterator _if_exists(OutputIterator result, fs::path path);
61 
63  template <typename OutputIterator>
64  OutputIterator _all_that_exist(OutputIterator result, fs::path location);
65 
70  bool m_relative;
71 
72  std::vector<std::string> m_all_bset;
73  std::vector<std::string> m_all_calctype;
74  std::vector<std::string> m_all_property;
75 };
76 
87 FileEnumerator::FileEnumerator(const PrimClex &_primclex, bool _all_settings,
88  bool _relative)
89  :
90 
91  m_primclex(_primclex),
92  m_dir(m_primclex.dir()),
93  m_set(m_primclex.settings()),
94  m_all_settings(_all_settings),
95  m_relative(_relative),
96  m_all_bset(m_dir.all_bset()),
97  m_all_calctype(m_dir.all_calctype()),
98  m_all_property(m_dir.all_property()) {}
99 
101 inline fs::path FileEnumerator::_if_relative(fs::path path) {
102  if (m_relative) {
103  auto a = m_primclex.dir().root_dir().string().size() + 1;
104  auto b = path.string().size();
105  return fs::path(path.string().substr(a, b));
106  }
107  return path;
108 }
109 
111 template <typename OutputIterator>
112 OutputIterator FileEnumerator::_if_exists(OutputIterator result,
113  fs::path path) {
114  if (fs::exists(path)) {
115  *result++ = _if_relative(path);
116  }
117  return result;
118 }
119 
121 template <typename OutputIterator>
122 OutputIterator FileEnumerator::_all_that_exist(OutputIterator result,
123  fs::path location) {
124  std::vector<fs::path> all;
125  std::string dir;
126 
127  // get all
128  if (!fs::exists(location)) {
129  return result;
130  }
131  fs::directory_iterator it(location);
132  fs::directory_iterator end_it;
133  for (; it != end_it; ++it) {
134  if (fs::is_regular_file(*it)) {
135  *result++ = _if_relative(it->path());
136  }
137  }
138  return result;
139 }
140 
152 template <typename OutputIterator>
153 OutputIterator FileEnumerator::basic_files(OutputIterator result) {
154  std::vector<fs::path> v{m_dir.prim(), // m_dir.PRIM(),
156  m_dir.config_list(),
157  m_dir.SCEL(),
161  for (auto it = v.begin(); it != v.end(); ++it) {
162  result = _if_exists(result, *it);
163  }
164  result = _all_that_exist(result, m_dir.enumerator_plugins());
165  return result;
166 }
167 
174 template <typename OutputIterator>
175 OutputIterator FileEnumerator::bset_files(OutputIterator result) {
176  // bset dependent:
177  for (auto bset : m_all_bset) {
178  if (!m_all_settings && bset != m_set.default_clex().bset) {
179  continue;
180  }
181  result = _if_exists(result, m_dir.bspecs(bset));
182  result = _if_exists(result, m_dir.clust(bset));
183  result = _if_exists(result, m_dir.basis(bset));
184  result =
186  }
187  return result;
188 }
189 
194 template <typename OutputIterator>
195 OutputIterator FileEnumerator::reference_files(OutputIterator result) {
196  // calctype / ref dependent
197  for (auto calctype : m_all_calctype) {
199  continue;
200  }
201  auto all_ref = m_dir.all_ref(calctype);
202  for (auto ref : all_ref) {
203  if (!m_all_settings && ref != m_set.default_clex().ref) {
204  continue;
205  }
206  result = _if_exists(result, m_dir.composition_axes());
207  result = _if_exists(result, m_dir.chemical_reference(calctype, ref));
208  }
209  }
210  return result;
211 }
212 
216 template <typename OutputIterator>
217 OutputIterator FileEnumerator::eci_files(OutputIterator result) {
218  // eci
219  if (!m_all_settings) {
220  result = _if_exists(
221  result,
224  m_set.default_clex().eci));
225  } else {
226  for (auto clex : m_all_property) {
227  for (auto calctype : m_all_calctype) {
228  auto all_ref = m_dir.all_ref(calctype);
229  for (auto ref : all_ref) {
230  for (auto bset : m_all_bset) {
231  auto all_eci = m_dir.all_eci(clex, calctype, ref, bset);
232  for (auto eci : all_eci) {
233  result =
234  _if_exists(result, m_dir.eci(clex, calctype, ref, bset, eci));
235  }
236  }
237  }
238  }
239  }
240  }
241  return result;
242 }
243 
247 template <typename OutputIterator>
248 OutputIterator FileEnumerator::calc_settings_files(OutputIterator result) {
249  for (auto calctype : m_all_calctype) {
251  continue;
252  }
253 
254  // calculation settings: global
255  result = _all_that_exist(result, m_dir.calc_settings_dir(calctype));
256 
257  // supercell level
258  for (const auto &scel : m_primclex.db<Supercell>()) {
259  result = _all_that_exist(
260  result, m_dir.supercell_calc_settings_dir(scel.name(), calctype));
261  }
262 
263  // configuration level
264  for (const auto &config : m_primclex.db<Configuration>()) {
266  config.name(), calctype));
267  }
268  }
269  return result;
270 }
271 
275 template <typename OutputIterator>
276 OutputIterator FileEnumerator::calc_status_files(OutputIterator result) {
277  for (auto calctype : m_all_calctype) {
279  continue;
280  }
281 
282  // calculation summaries: properties.calc.json, status.json
283  for (const auto &config : m_primclex.db<Configuration>()) {
284  result = _if_exists(result,
286  result = _if_exists(result, m_dir.calc_status(config.name(), calctype));
287  }
288  }
289  return result;
290 }
291 
295 template <typename OutputIterator>
296 OutputIterator FileEnumerator::all_calc_files(OutputIterator result) {
297  // get all files in 'training_data', recursively
298  fs::recursive_directory_iterator it(m_dir.training_data()), end;
299  std::string pattern = "calctype.";
300  for (; it != end; ++it) {
301  // if not requesting all settings
302  if (!m_all_settings) {
303  // avoid recursing into other calctypes
304  if (fs::is_directory(*it)) {
305  std::string dir = it->path().filename().string();
306  if (dir.substr(0, pattern.size()) == pattern) {
307  if (dir.substr(pattern.size(), dir.size()) !=
309  it.no_push();
310  }
311  }
312  }
313  }
314 
315  if (fs::is_regular_file(*it)) {
316  *result++ = _if_relative(it->path());
317  }
318  }
319  return result;
320 }
321 
322 } // namespace CASM
323 
324 #endif
Specification of CASM project directory structure.
fs::path prim() const
Return prim.json path.
fs::path root_dir() const
Return casm project directory path.
std::vector< std::string > all_eci(std::string property, std::string calctype, std::string ref, std::string bset) const
Check filesystem directory structure and return list of all eci names.
fs::path configuration_calc_settings_dir(std::string configname, std::string calctype) const
Return calculation settings directory path, for configuration specific settings.
fs::path SCEL() const
Return SCEL path.
fs::path calculated_properties(std::string configname, std::string calctype) const
Return properties.calc.json file path.
fs::path crystal_point_group() const
Return crystal_point_group.json path.
fs::path config_list() const
Return master config_list.json file path.
fs::path project_settings() const
Return project_settings.json path.
fs::path clust(std::string bset) const
fs::path enumerator_plugins() const
Return enumerators plugin dir.
fs::path training_data() const
Return 'training_data' directorty path.
fs::path chemical_reference(std::string calctype, std::string ref) const
Return chemical reference file path.
fs::path basis(std::string bset) const
fs::path eci(std::string property, std::string calctype, std::string ref, std::string bset, std::string eci) const
Returns path to eci.json.
fs::path calc_settings_dir(std::string calctype) const
Return calculation settings directory path, for global settings.
fs::path supercell_calc_settings_dir(std::string scelname, std::string calctype) const
Return calculation settings directory path, for supercell specific settings.
fs::path calc_status(std::string configname, std::string calctype) const
Return calculation status file path.
fs::path lattice_point_group() const
Return lattice_point_group.json path.
fs::path clexulator_src(std::string project_name, std::string bset) const
Returns path to clexulator source file.
fs::path composition_axes() const
Return composition axes file path.
fs::path factor_group() const
Return factor_group.json path.
fs::path bspecs(std::string bset) const
Return basis function specs (bspecs.json) file path.
std::vector< std::string > all_ref(std::string calctype) const
Check filesystem directory structure and return list of all ref names for a given calctype.
Lists all files in a CASM project, for use with 'casm files' command.
OutputIterator all_calc_files(OutputIterator result)
Enumerate all training data files.
const PrimClex & m_primclex
OutputIterator _if_exists(OutputIterator result, fs::path path)
output path if it exists
std::vector< std::string > m_all_property
FileEnumerator(const PrimClex &_primclex, bool _all_settings=false, bool _relative=false)
A CASM project file enumerator.
fs::path _if_relative(fs::path path)
make paths relative to m_primclex.dir().root_dir() if m_relative
ProjectSettings m_set
OutputIterator calc_status_files(OutputIterator result)
Enumerate calculation status files.
DirectoryStructure m_dir
OutputIterator reference_files(OutputIterator result)
Enumerate reference files.
OutputIterator eci_files(OutputIterator result)
Enumerate eci files.
OutputIterator _all_that_exist(OutputIterator result, fs::path location)
Get all regular files that exist in directory 'location'.
OutputIterator calc_settings_files(OutputIterator result)
Enumerate calculation settings files.
std::vector< std::string > m_all_calctype
OutputIterator bset_files(OutputIterator result)
Enumerate bset files.
std::vector< std::string > m_all_bset
OutputIterator basic_files(OutputIterator result)
Enumerate all setting independent files.
PrimClex is the top-level data structure for a CASM project.
Definition: PrimClex.hh:55
std::string project_name() const
Get project name.
ClexDescription const & default_clex() const
Get default ClexDescription.
Represents a supercell of the primitive parent crystal structure.
Definition: Supercell.hh:51
DB::Database< T > & db() const
Definition: PrimClex.cc:302
const DirectoryStructure & dir() const
Access DirectoryStructure object. Throw if not set.
Definition: PrimClex.cc:230
ConfigIO::GenericConfigFormatter< jsonParser > config()
Definition: ConfigIO.cc:777
Main CASM namespace.
Definition: APICommand.hh:8
pair_type ref
Definition: settings.cc:144
pair_type eci
Definition: settings.cc:146
pair_type calctype
Definition: settings.cc:143
DirectoryStructure const & dir
Definition: settings.cc:136
pair_type bset
Definition: settings.cc:145