cbutils.h 6.36 KB
/*=========================================================================
  Program:   cesbio-utils
  Language:  C++

  Copyright (c) CESBIO. All rights reserved.

  See cbutils-copyright.txt for details.

  This software is distributed WITHOUT ANY WARRANTY; without even
  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  PURPOSE.  See the above copyright notices for more information.

=========================================================================*/
#ifndef __CBUTILS_H
#define __CBUTILS_H
#include <algorithm>
#include <boost/algorithm/string.hpp>
#include <cstddef>
#include <cstdlib>
#include <fstream>
#include <stdexcept>
#include <string>
#include <sstream>
#include <vector>
#include <random>
#include <cassert>
#include <chrono>
#include <iomanip>
#include <regex>

#ifdef USE_FILESYSEM
#include <boost/filesystem.hpp>
namespace fs = boost::filesystem;
#endif

namespace cbutils
{
// ----------------- String manipulation ---------------------------------------
namespace string
{
// Splits a string using any of a list of separators
[[nodiscard]] std::vector<std::string> split(const std::string& str,
                               const char* separators = "\t ")
{
  std::vector< std::string > tokens;
  boost::split(tokens, str, boost::is_any_of(separators),
               boost::token_compress_on);
  return tokens;
}
// Joins a vector of strings into a string
[[nodiscard]] std::string join(const std::vector<std::string>& vos,
                               char sep=' ')
{
  std::stringstream ss;
  ss << vos[0];
  for(size_t i=1; i<vos.size(); ++i)
    {
    ss << sep << vos[i];
    }
  return ss.str();
}
// True if the string contains one of the patterns
[[nodiscard]] bool contains(std::string s, std::vector<std::string> patterns)
{
  bool result{false};
  for(const auto& pat : patterns)
    {
    if(s.find(pat) != std::string::npos)
      {
      return true;
      }
    }
  return result;
}
} //end ns string
// ----------------- System calls ----------------------------------------------
  namespace system{
// Runs the command sending it to the OS and returns a string wit its output
  std::string call(const std::string &command) {
    std::string tmp_filename{std::tmpnam(nullptr)};
    std::stringstream command_redirect{command + " > " + tmp_filename};
    std::system(command_redirect.str().c_str());
    std::stringstream command_output;
    command_output << std::ifstream(tmp_filename).rdbuf();
    std::remove(tmp_filename.c_str());
    return command_output.str();
  }
  } //end ns system
// ----------------- File IO ---------------------------------------------------
namespace file
{
//Reads a text file and returns a vector of lines
[[nodiscard]] std::vector<std::string> read_lines(const std::string& fname)
{
  std::ifstream ifs(fname);
  std::vector<std::string> lines{};
  std::string line{};
  while(ifs.good())
    {
    std::getline(ifs, line);
    lines.emplace_back(line);
    }
  return lines;
}
//Count the number of columns of a CSV file
[[nodiscard]] std::size_t count_columns(const std::string& fname,
                          const char* separators = "\t ")
{
  std::ifstream ifile(fname);
  if (ifile.is_open())
    {
    std::string line{};
    std::getline(ifile,line);
    return string::split(line, separators).size();
    }
  else
    {
    std::stringstream errmessage{"Could not open "+fname};
    throw std::runtime_error(errmessage.str());
    }
  return 0;
}

#ifdef USE_FILESYSEM
// List the files in a directory eventually matching a pattern
// needs to link with -lboost_system -lboost_filesystem
[[nodiscard]] std::vector<std::string> list_files(const std::string& dir,
                                    const std::string& pattern = ".*")
{
  std::regex regex_pattern(pattern);
  std::vector<std::string> result;
  for(auto& p: fs::directory_iterator(dir))
    {
    std::stringstream fname;
    fname << p;
    if(std::regex_search(fname.str(), regex_pattern))
      {
      auto clean_fname = fname.str();
      boost::algorithm::erase_all(clean_fname, "\"");
      result.emplace_back(clean_fname);
      }
    }

  return result;
}
#endif
}//end ns file
// ----------------- Sequence manipulation -------------------------------------
namespace seq
{
//Chops off the head of a sequence and returns its tail
template <typename Seq>
[[nodiscard]] Seq tail(const Seq& s)
{
  Seq result{};
  auto head = std::begin(s);
  std::advance(head,1);
  std::copy(head, std::end(s), std::back_inserter(result));
  return result;
}
//Returns the last element of a sequence
template <typename Seq>
[[nodiscard]] auto last(const Seq& s)
{
  auto element = s.cend();
  --element;
  return *element;
}
//python-like range function
template <typename T>
[[nodiscard]] std::vector<T> range(T minvalue, T maxvalue, T step=1)
{
  assert(step>0);
  auto nb_elements = (maxvalue-minvalue)/step+1;
  std::vector<T> result(nb_elements);
  result[0]=minvalue;
  for(size_t i=1; i<nb_elements; ++i)
    result[i] = result[i-1]+step;

  return result;
}
}// end ns seq
// ----------------- Random numbers --------------------------------------------
namespace rand
{
template <typename T>
[[nodiscard]] T uniform(T minvalue, T maxvalue, std::mt19937& engine,
                        std::true_type)
{
  static std::uniform_int_distribution<T> dist(minvalue, maxvalue);
  return dist(engine);
}
template <typename T>
[[nodiscard]] T uniform(T minvalue, T maxvalue, std::mt19937& engine,
                        std::false_type)
{
  static std::uniform_real_distribution<T> dist(minvalue, maxvalue);
  return dist(engine);
}
template <typename T>
[[nodiscard]] T uniform(T minvalue, T maxvalue)
{
  static std::random_device random_device;
  static std::mt19937 engine{random_device()};
  return uniform(minvalue, maxvalue, engine, std::is_integral<T>());
}
}// edn ns rand
// ----------------- Miscellaneous ---------------------------------------------
namespace misc
{
[[nodiscard]] std::string now_str()
{
  auto now_t =
    std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
  std::stringstream result;
  result <<  std::put_time(std::localtime(&now_t), "%F %T") << " -- ";
  return result.str();
}
}// end ns misc
namespace operators{
// Print the contents of a vector of printable things
template<typename T>
std::ostream& operator <<(std::ostream& stream, const std::vector<T> vec){
  for(const auto& t : vec) stream << t << " ";
  return stream;
}
} // end ns operators
}//end ns cbutils
#endif