-
Notifications
You must be signed in to change notification settings - Fork 165
/
Copy pathHDF5DataManager.cpp
199 lines (176 loc) · 7.8 KB
/
HDF5DataManager.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
/************************************************************************
* Copyright(c) 2009, One Unified. All rights reserved. *
* email: [email protected] *
* *
* This file is provided as is WITHOUT ANY WARRANTY *
* without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
* *
* This software may not be used nor distributed without proper license *
* agreement. *
* *
* See the file LICENSE.txt for redistribution information. *
************************************************************************/
#include <cassert>
#include <iostream>
#include <stdexcept>
#include "HDF5DataManager.h"
namespace ou { // One Unified
namespace tf { // TradeFrame
const std::string HDF5DataManager::c_sH5FileName( "TradeFrame.hdf5" );
//H5::H5File HDF5DataManager::m_H5File;
//unsigned int HDF5DataManager::m_RefCount = 0;
//HDF5DataManager HDF5DM; // statically defined, so at least one instance is always present
// (2012/08/12: why?) because code is inefficient. file is open/closed repeatedly., need to be able to pass a handle for operations.
// needs a good rethink and re-architect for file handle handling
HDF5DataManager::HDF5DataManager( enumFileOptionType fot )
: HDF5DataManager( fot, c_sH5FileName )
{}
HDF5DataManager::HDF5DataManager( enumFileOptionType fot, const std::string& sH5FileName ) {
assert( 0 < sH5FileName.size() );
// ++m_RefCount;
// if ( 1 == m_RefCount ) {
//std::cout << "Opening DataManager" << std::endl;
// Open and prepare HDF5 stuff (HDF5 precompiled utilities cannot handle split files
// H5::FileAccPropList pl1;
H5::FileAccPropList pl2;
// pl2.setFamily( 2048000000, pl1 );
try {
try {
// try for existing file
switch ( fot ) {
case RO:
m_H5File.openFile( sH5FileName, H5F_ACC_RDONLY, pl2 );
break;
case RDWR:
m_H5File.openFile( sH5FileName, H5F_ACC_RDWR, pl2 );
break;
}
}
catch (...) {
// try to create and open if it doesn't exist
m_H5File.openFile( sH5FileName, H5F_ACC_CREAT | H5F_ACC_RDWR, pl2 );
H5::Group g1( GetH5File()->createGroup( "/bar" ) );
g1.close();
H5::Group g2( GetH5File()->createGroup( "/bar/86400" ) );
g2.close();
H5::Group g3( GetH5File()->createGroup( "/symbol" ) );
g3.close();
}
}
catch (...) {
std::cout << "problems with HDF5 system" << std::endl;
}
// std::cout << "DataManager opened" << std::endl;
// }
// else {
//throw std::exception( "Only one DataManager may be instantiated." );
// multiple instantiations are allowed as we may be using a number of different files
// }
}
HDF5DataManager::~HDF5DataManager() {
// --m_RefCount;
// if ( 0 == m_RefCount ) {
m_H5File.close();
// }
}
void HDF5DataManager::Flush() {
GetH5File()->flush( H5F_SCOPE_GLOBAL );
}
bool HDF5DataManager::GroupExists( const std::string &sGroup ) {
bool bGroupExists = false;
try {
H5::Group g = GetH5File()->openGroup( sGroup );
g.close();
bGroupExists = true;
}
catch ( const H5::Exception /* &e */ ) {
// group doesn't exist so just ignore
}
catch ( ... ) {
std::cout << "HDF5DataManager::GroupExists unknown error" << std::endl;
}
return bGroupExists;
}
void HDF5DataManager::IteratePathParts( const std::string& sPath, fCallbackIteratorPath_t&& f ) {
// path needs to be an established, proven path, something already generated by the path search mechanism
// /symbol, /symbol/G, /symbol/G/O, /symbol/G/O/GOOG
// ensure that appropriate group has been created in the file
if ( 1 >= sPath.size() ) return; // don't process if has no characters or just a "/"
assert( '/' == sPath[ 0 ] );
std::string::size_type ixBeginPathPart = 1; // first character should always be '/', and ignore it
std::string::size_type ixEndSlash = sPath.find( '/', ixBeginPathPart );
while ( std::string::npos != ixEndSlash ) {
std::string sPathPart( sPath.substr( ixBeginPathPart, ixEndSlash - ixBeginPathPart ) );
f( sPathPart ); // call with current sub-path
ixBeginPathPart = ixEndSlash + 1;
if ( sPath.size() <= ixBeginPathPart ) break; // no more string to scan
ixEndSlash = sPath.find( '/', ixBeginPathPart );
}
}
void HDF5DataManager::AddGroup( const std::string &sGroupPath ) { // needs to have terminating '/'
// /symbol, /symbol/G, /symbol/G/O, /symbol/G/O/GOOG
std::string sSubPath;
// ensure that appropriate group has been created in the file
std::string::size_type ixSlash = sGroupPath.find( '/', 1 ); // ignore initial / character
while ( std::string::npos != ixSlash ) {
sSubPath = sGroupPath.substr( 0, ++ixSlash ); // use ixSlash as count here
try {
try {
H5::Group g = GetH5File()->openGroup( sSubPath );
g.close();
} // one of these when doesn't exist
catch ( H5::FileIException e ) {
// what is the specific exception here? -- look for specific text string
H5::Group g = GetH5File()->createGroup( sSubPath );
g.close();
}
catch ( H5::Exception e ) { // what is the specific exception here?
std::cout << "HDF5DataManager::AddGroup H5::Exception for '" << sGroupPath << "', " << e.getDetailMsg() << std::endl;
}
}
catch (...) {
throw std::runtime_error( "HDF5DataManager::AddGroup has creation problems" );
}
ixSlash = sGroupPath.find( '/', ixSlash ); // use incremented ixSlash here as new start index
}
}
void HDF5DataManager::AddGroupForSymbol( const std::string &sSymbol ) {
AddGroup( "/symbol/" + sSymbol );
}
herr_t HDF5DataManager::PrintH5ErrorStackItem( int n, H5E_error_t *err_desc, void *client_data ) {
// this is a call back from within an exception handler
std::cout << "H5 Error Level " << n << ": "
<< err_desc->file_name << "::"
<< err_desc->func_name << "::"
<< err_desc->desc << std::endl;
return 1;
}
void HDF5DataManager::DailyBarPath( const std::string &sSymbol, std::string &sPath ) {
sPath = "/bar/86400/";
sPath.append( sSymbol.substr( 0, 1 ) );
sPath.append( "/" );
sPath.append( sSymbol.substr( sSymbol.length() == 1 ? 0 : 1, 1 ) );
sPath.append( "/" );
sPath.append( sSymbol );
}
} // namespace tf
} // namespace ou
/*
If you are only writing to very small datasets, then the default
chunk cache size (1 MB) is most likely large enough, since this limit
is applied to each dataset individually. However, if you are regularly
rewriting/reading the same portions of the dataset, and it can grow beyond
1 MB then you may see a benefit from increasing the cache size.
Depending on your chunk size, you may also want to increase the number of
elements in the chunk cache from the default 521 (make sure it stays a prime number).
Be careful about having too many datasets open at once though, as the limit is 1 MB
for each dataset. So if you have several million datasets open you potentially
have several million megabytes of cache.
The chunk size should align as closely as possible to your typcial selection for
writing (or reading). This minimizes the amount of costly scattering as well as
wasted space in the cache. However you should not set it too small, in order to avoid excessive overhead.
Thanks,
-Neil Fortner
Neil Fortner [[email protected]]
*/