BornAgain  1.19.0
Simulate and fit neutron and x-ray scattering at grazing incidence
CsvImportAssistant.cpp
Go to the documentation of this file.
1 // ************************************************************************************************
2 //
3 // BornAgain: simulate and fit reflection and scattering
4 //
5 //! @file GUI/coregui/Views/ImportDataWidgets/CsvImportAssistant/CsvImportAssistant.cpp
6 //! @brief Implements class CsvImportAssistant
7 //!
8 //! @homepage http://www.bornagainproject.org
9 //! @license GNU General Public License v3 or higher (see COPYING)
10 //! @copyright Forschungszentrum Jülich GmbH 2018
11 //! @authors Scientific Computing Group at MLZ (see CITATION, AUTHORS)
12 //
13 // ************************************************************************************************
14 
16 // TODO avoid importing a cpp file
20 #include <QFileDialog>
21 #include <QFormLayout>
22 #include <QMenu>
23 #include <QMessageBox>
24 #include <QPushButton>
25 #include <QSettings>
26 #include <QTableWidget>
27 #include <QVBoxLayout>
28 #include <algorithm>
29 
30 CsvImportAssistant::CsvImportAssistant(const QString& file, const bool useGUI, QWidget* parent)
31  : m_fileName(file)
32  , m_csvFile(nullptr)
33  , m_csvArray()
34  , m_separator('\0')
35  , m_intensityColNum(-1)
36  , m_intensityMultiplier(1.0)
37  , m_coordinateColNum(-1)
38  , m_coordinateMultiplier(1.0)
39  , m_firstRow(-1)
40  , m_lastRow(-1)
41  , m_units(Axes::Units::NBINS)
42  , m_dataAvailable(false)
43 {
44  if (!loadCsvFile()) {
45  return;
46  }
47 
48  if (useGUI) {
49  runDataSelector(parent);
50  } else {
52  m_coordinateColNum = -1;
53  m_units = Axes::Units::NBINS;
54  m_firstRow = 0;
55  m_lastRow = int(m_csvFile->NumberOfRows() - 1);
56  m_dataAvailable = true;
57  }
58 }
59 
61 {
62  DataSelector selector(m_csvArray, parent);
64  selector.setSeparator(guessSeparator());
65  connect(&selector, &DataSelector::separatorChanged, this, [this, &selector](char newSep) {
66  if (newSep != m_separator) {
67  m_separator = newSep;
68  loadCsvFile();
69  selector.setDataArray(m_csvArray);
70  selector.setSeparator(newSep);
71  }
72  });
73 
74  int res = selector.exec();
75 
76  if (res == selector.Accepted) {
81  m_units = selector.units();
82  m_firstRow = int(selector.firstLine() - 1);
83  m_lastRow = int(selector.lastLine() - 1);
84  m_rowsToDiscard = selector.rowsToDiscard();
85  m_dataAvailable = true;
86  } else if (res == selector.Rejected) {
87  m_dataAvailable = false;
88  return;
89  }
90 }
91 
92 void CsvImportAssistant::setIntensityColumn(int iCol, double multiplier)
93 {
94  m_intensityColNum = iCol - 1;
95  m_intensityMultiplier = multiplier;
96 }
97 void CsvImportAssistant::setCoordinateColumn(int iCol, Axes::Units units, double multiplier)
98 {
99  m_coordinateColNum = iCol - 1;
100  m_units = units;
101  m_coordinateMultiplier = multiplier;
102 }
104 {
105  m_firstRow = iRow - 1;
106 }
108 {
109  m_lastRow = iRow - 1;
110 }
111 
113 {
114 
115  try {
116  if (m_separator == '\0')
118  m_csvFile = std::make_unique<CSVFile>(m_fileName.toStdString(), m_separator);
119  } catch (...) {
120  showErrorMessage("There was a problem opening the file \"" + m_fileName.toStdString()
121  + "\"");
122  return false;
123  }
124 
125  size_t lastRow = m_csvFile->NumberOfRows();
126 
127  if (lastRow < 1) {
128  CsvImportAssistant::showErrorMessage("The file exist but it seems to be empty");
129  return false;
130  }
131 
132  auto csvArray = m_csvFile->asArray();
133 
134  // Automatically ignore empty lines at the end:
135  while (QString::fromStdString(accumulate(csvArray[lastRow - 1].begin(),
136  csvArray[lastRow - 1].end(), std::string("")))
137  .trimmed()
138  == "") {
139  lastRow--;
140  if (lastRow < 1) {
141  CsvImportAssistant::showErrorMessage("The file exist but it seems to be empty");
142  return false;
143  }
144  }
145 
146  // TODO: If separator is white space, ignore consecutive spaces...
147  // {
148  // }
149 
150  csv::DataArray tmp(csvArray.begin(), csvArray.begin() + int(lastRow));
151  m_csvArray.swap(tmp);
153  if (m_separator == ' ')
155 
156  return true;
157 }
158 
160 {
161  resetSelection();
162  loadCsvFile();
163 }
164 
166 {
167  // In case a 2d import is needed in the future
168  // Use ArrayUtils::Create2dData(vector<vector<double>>)
169  // ArrayUtils::Create2d
170  std::unique_ptr<OutputData<double>> resultOutputData;
171  resultOutputData = std::make_unique<OutputData<double>>();
172  std::vector<double> intensityValues;
173  std::vector<double> coordinateValues;
174 
175  getValuesFromColumns(intensityValues, coordinateValues);
176 
177  const auto axisName = axisUnitLabel.at(m_units);
178  PointwiseAxis coordAxis(axisName, coordinateValues);
179  resultOutputData->addAxis(coordAxis);
180  resultOutputData->setRawDataVector(intensityValues);
181 
182  ImportDataInfo result(std::move(resultOutputData), m_units);
183  return result;
184 }
185 
186 void CsvImportAssistant::getValuesFromColumns(std::vector<double>& intensityValues,
187  std::vector<double>& coordinateValues)
188 {
189  bool intensityOk = true;
190  bool coordinateOk = true;
191  auto firstRow = size_t(m_firstRow);
192  auto lastRow = size_t(m_lastRow) + 1;
193  bool isCoordinateNeeded = m_coordinateColNum > -1;
194  double intensityValue = 0.;
195  double coordinateValue = 0.;
196 
197  for (size_t row = firstRow; row < lastRow; row++) {
198  if (std::find(m_rowsToDiscard.begin(), m_rowsToDiscard.end(), int(row))
199  == m_rowsToDiscard.end()) {
200  // Intensity Values:
201  QString intensityText =
202  QString::fromStdString(m_csvArray[row][size_t(m_intensityColNum)]);
203  intensityValue = intensityText.toDouble(&intensityOk);
204 
205  // Coordinate Values:
206  if (isCoordinateNeeded) {
207  QString coordinateText =
208  QString::fromStdString(m_csvArray[row][size_t(m_coordinateColNum)]);
209  coordinateValue = coordinateText.toDouble(&coordinateOk);
210  } else {
211  coordinateValue = row - firstRow;
212  }
213 
214  // Add them if they are both ok:
215  if (intensityOk && coordinateOk) {
216  intensityValues.push_back(m_intensityMultiplier * intensityValue);
217  coordinateValues.push_back(m_coordinateMultiplier * coordinateValue);
218  }
219  }
220  }
221 }
222 
224 {
225  if (m_csvArray.empty())
226  return;
227 
228  csv::DataArray buffer2d;
229  csv::DataRow buffer1d;
230  size_t nRows = m_csvArray.size();
231  size_t nCols = m_csvArray[0].size();
232  size_t newNcols = 0;
233 
234  for (size_t i = 0; i < nRows; i++) {
235  buffer1d.clear();
236  for (size_t j = 0; j < nCols; j++) {
237  QString text = QString::fromStdString(m_csvArray[i][j]).trimmed();
238  if (text != "")
239  buffer1d.push_back(text.toStdString());
240  }
241  newNcols = std::max(buffer1d.size(), newNcols);
242  buffer2d.push_back(buffer1d);
243  }
244 
245  if (buffer2d.empty()) {
246  m_csvArray.clear();
247  return;
248  }
249 
250  for (size_t i = 0; i < nRows; i++)
251  while (buffer2d[i].size() < newNcols)
252  buffer2d[i].push_back("");
253 
254  // now buffer2d has the original array, without empty cells
255  nRows = buffer2d.size();
256  nCols = buffer2d[0].size();
257 
258  m_csvArray.swap(buffer2d);
259 }
260 
262 {
263 
264  if (m_csvArray.empty())
265  return;
266 
267  csv::DataArray buffer2d;
268  csv::DataRow buffer1d;
269  std::vector<int> to_be_removed;
270 
271  size_t nRows = m_csvArray.size();
272  size_t nCols = m_csvArray[0].size();
273 
275  throw std::runtime_error("All inner vectors should have the same length already.");
276  }
277 
278  // traverse the array columnwise -- this may be inneficient.
279  for (size_t j = 0; j < nCols; j++) {
280  buffer1d.clear();
281  for (size_t i = 0; i < nRows; i++) {
282  buffer1d.push_back(m_csvArray[i][j]);
283  }
284  if (QString::fromStdString(accumulate(buffer1d.begin(), buffer1d.end(), std::string("")))
285  .trimmed()
286  == "")
287  continue;
288 
289  buffer2d.push_back(buffer1d);
290  }
291 
292  if (buffer2d.empty()) {
293  m_csvArray.clear();
294  return;
295  }
296 
297  // now buffer2d has the original array, without blank columns, transposed.
298  nCols = buffer2d.size();
299  nRows = buffer2d[0].size();
300 
301  // Save the modified array --i.e. transpose buffer2d
302  m_csvArray.clear();
303  for (size_t i = 0; i < nRows; i++) {
304  buffer1d.clear();
305  for (size_t j = 0; j < nCols; j++) {
306  buffer1d.push_back(buffer2d[j][i]);
307  }
308  m_csvArray.push_back(buffer1d);
309  }
310 }
311 
313 {
314  int frequencies[127] = {0};
315 
316  // The actual characters that may be realistically
317  // used as separators are only a handfull...
318  // And this list seems already exagerated.
319  std::vector<char> preferredSeparators;
320  preferredSeparators.push_back(' ');
321  preferredSeparators.push_back(',');
322  preferredSeparators.push_back(';');
323  preferredSeparators.push_back('|');
324  preferredSeparators.push_back(':');
325  preferredSeparators.push_back('\t');
326  // preferredSeparators.push_back('/');
327  // preferredSeparators.push_back('\\');
328  // preferredSeparators.push_back('_');
329  preferredSeparators.push_back('\'');
330  preferredSeparators.push_back('\"');
331 
332  // count number of occurences of each char in the file:
333  char c;
334  std::ifstream is(m_fileName.toStdString());
335  while (is.get(c)) {
336  if (size_t(c) < 127)
337  frequencies[size_t(c)]++;
338  }
339  is.close();
340 
341  // set the guessed separator as the most frequent among the
342  // preferred separators. -- Some unavoidable hieararchy is
343  // present: characters with lower ascii code are preferred.
344  char guessedSep = ' ';
345  int freq = 0;
346  for (char i = 0; i < 127; i++) {
347  if (std::find(preferredSeparators.begin(), preferredSeparators.end(), i)
348  != preferredSeparators.end())
349  if (frequencies[int(i)] > freq) {
350  freq = frequencies[int(i)];
351  guessedSep = i;
352  }
353  }
354 
355  // We don't like tabs, as we cannot write them in the GUI.
356  // The rest of the CsvImportAssistant and CsvReader should be already aware of this.
357  if (guessedSep == '\t')
358  guessedSep = ' ';
359 
360  return guessedSep;
361 }
362 
364 {
365  auto tf = all_of(begin(dataArray), end(dataArray), [dataArray](const csv::DataRow& x) {
366  return x.size() == dataArray.front().size();
367  });
368  return tf;
369 }
370 
371 void CsvImportAssistant::showErrorMessage(std::string message)
372 {
373  QMessageBox msgBox;
374  msgBox.setText(QString::fromStdString(message));
375  msgBox.setIcon(msgBox.Critical);
376  msgBox.exec();
377 }
378 
380 {
381  m_csvArray.clear();
382  m_intensityColNum = -1;
383  m_coordinateColNum = -1;
384  m_firstRow = -1;
385  m_lastRow = -1;
386  m_units = Axes::Units::NBINS;
387  m_dataAvailable = false;
388 }
Implements class DataFormatUtils.
Defines class DataSelector.
const std::map< Axes::Units, const char * > axisUnitLabel
DefinesStyleUtils namespace.
Wrapper for detector axes units, required for a better representation of detector axes units in pytho...
void setIntensityColumn(int iCol, double multiplier=1.0)
csv::DataArray m_csvArray
std::set< int > m_rowsToDiscard
bool hasEqualLengthLines(csv::DataArray &dataArray)
void setFirstRow(int iRow)
static void showErrorMessage(std::string message)
ImportDataInfo fillData()
std::unique_ptr< CSVFile > m_csvFile
CsvImportAssistant(const QString &file, const bool useGUI=false, QWidget *parent=nullptr)
void setCoordinateColumn(int iCol, Axes::Units units, double multiplier=1.0)
void runDataSelector(QWidget *parent)
void getValuesFromColumns(std::vector< double > &intensityVals, std::vector< double > &coordVals)
Dialog to hold DataSelector.
Definition: DataSelector.h:32
size_t lastLine() const
void setSeparator(char newSeparator)
Definition: DataSelector.h:50
int intensityColumn() const
Definition: DataSelector.h:38
double coordinateMultiplier() const
Definition: DataSelector.h:41
std::set< int > rowsToDiscard() const
Definition: DataSelector.h:42
double intensityMultiplier() const
Definition: DataSelector.h:40
Axes::Units units() const
size_t firstLine() const
void separatorChanged(char newSeparator)
int coordinateColumn() const
Definition: DataSelector.h:39
void setDataArray(csv::DataArray csvArray)
Definition: DataSelector.h:44
Carries information about loaded data.
Axis containing arbitrary (non-equidistant) coordinate values.
Definition: PointwiseAxis.h:37
Defines namespace Constants.
Constants and functions for physical unit conversions.
Definition: Units.h:30
std::vector< std::string > DataRow
Definition: CsvNamespace.h:27
std::vector< std::vector< std::string > > DataArray
Definition: CsvNamespace.h:26