A class for parsing CSV files (or tab-separated files, or whitespace separated files, etc.). (This class does not support Mac line endings, so you should replace all '' with '
' before using this class if your data comes from a Mac.)
More...
#include <GMatrix.h>
|
| GCSVParser () |
|
| ~GCSVParser () |
|
void | columnNamesInFirstRow () |
| Indicate that the first row specifies column names. More...
|
|
void | parse (GMatrix &outMatrix, const char *szFilename) |
| Load the specified file, and parse it. More...
|
|
void | parse (GMatrix &outMatrix, const char *pString, size_t len) |
| Parse the given string. More...
|
|
std::string & | report (size_t column) |
| Return a string that reports the status of the specified column. (This should only be called after parsing.) More...
|
|
void | setClearlyNumericalThreshold (size_t n) |
| Specify the number of unique numerical values before a column is deemed to be clearly numerical. More...
|
|
void | setMaxVals (size_t n) |
| Specify the maximum number of values to allow in a categorical attribute. The parsing of any columns that contain non-numerical values, and contain more than this number of unique values, will be aborted. More...
|
|
void | setNominalAttr (size_t attr) |
| Indiciate that the specified attribute should be treated as nominal. More...
|
|
void | setRealAttr (size_t attr) |
| Indiciate that the specified attribute should be treated as real. More...
|
|
void | setSeparator (char c) |
| Specify the separating character. '\0' indicates that an arbitrary amount of whitespace is used for separation. More...
|
|
void | setTimeFormat (size_t attr, const char *szFormat) |
| Specify that a certain attribute should be expected to be a date or time stamp that follows a given format. More...
|
|
void | tolerant () |
| Specify to ignore inconsistencies in the number of values in each row. (Using this is very dangerous.) More...
|
|
A class for parsing CSV files (or tab-separated files, or whitespace separated files, etc.). (This class does not support Mac line endings, so you should replace all '' with '
' before using this class if your data comes from a Mac.)
GClasses::GCSVParser::GCSVParser |
( |
| ) |
|
GClasses::GCSVParser::~GCSVParser |
( |
| ) |
|
void GClasses::GCSVParser::columnNamesInFirstRow |
( |
| ) |
|
|
inline |
Indicate that the first row specifies column names.
void GClasses::GCSVParser::parse |
( |
GMatrix & |
outMatrix, |
|
|
const char * |
szFilename |
|
) |
| |
Load the specified file, and parse it.
void GClasses::GCSVParser::parse |
( |
GMatrix & |
outMatrix, |
|
|
const char * |
pString, |
|
|
size_t |
len |
|
) |
| |
std::string& GClasses::GCSVParser::report |
( |
size_t |
column | ) |
|
|
inline |
Return a string that reports the status of the specified column. (This should only be called after parsing.)
void GClasses::GCSVParser::setClearlyNumericalThreshold |
( |
size_t |
n | ) |
|
|
inline |
Specify the number of unique numerical values before a column is deemed to be clearly numerical.
void GClasses::GCSVParser::setMaxVals |
( |
size_t |
n | ) |
|
|
inline |
Specify the maximum number of values to allow in a categorical attribute. The parsing of any columns that contain non-numerical values, and contain more than this number of unique values, will be aborted.
void GClasses::GCSVParser::setNominalAttr |
( |
size_t |
attr | ) |
|
Indiciate that the specified attribute should be treated as nominal.
void GClasses::GCSVParser::setRealAttr |
( |
size_t |
attr | ) |
|
Indiciate that the specified attribute should be treated as real.
void GClasses::GCSVParser::setSeparator |
( |
char |
c | ) |
|
|
inline |
Specify the separating character. '\0' indicates that an arbitrary amount of whitespace is used for separation.
void GClasses::GCSVParser::setTimeFormat |
( |
size_t |
attr, |
|
|
const char * |
szFormat |
|
) |
| |
Specify that a certain attribute should be expected to be a date or time stamp that follows a given format.
void GClasses::GCSVParser::tolerant |
( |
| ) |
|
|
inline |
Specify to ignore inconsistencies in the number of values in each row. (Using this is very dangerous.)
size_t GClasses::GCSVParser::m_clearlyNumericalThreshold |
|
protected |
bool GClasses::GCSVParser::m_columnNamesInFirstRow |
|
protected |
std::map<size_t, std::string> GClasses::GCSVParser::m_formats |
|
protected |
size_t GClasses::GCSVParser::m_maxVals |
|
protected |
std::vector<std::string> GClasses::GCSVParser::m_report |
|
protected |
char GClasses::GCSVParser::m_separator |
|
protected |
std::map<size_t, size_t> GClasses::GCSVParser::m_specifiedNominal |
|
protected |
std::map<size_t, size_t> GClasses::GCSVParser::m_specifiedReal |
|
protected |
bool GClasses::GCSVParser::m_tolerant |
|
protected |