tidies up UrlReader component

This commit is contained in:
tosh 2013-05-15 15:29:02 +02:00
parent 9765ce7b9c
commit 6a47db91da
2 changed files with 129 additions and 74 deletions

View file

@ -10,62 +10,83 @@
#include <new>
#ifdef _WIN32
#define NOCURL_IN_WINDOWS
#endif
#include <sys/types.h>
#include <sys/stat.h>
#ifndef NOCURL_IN_WINDOWS
#include <curl/curl.h>
size_t const UrlReader::max_read_ahead = CURL_MAX_WRITE_SIZE;
#else
size_t const UrlReader::max_read_ahead = 0;
#endif
#include "shared_Log.h"
using shared_lib::printLog;
char const* const UrlReader::success = "UrlReader: Success!";
char const* const UrlReader::success_cached = "UrlReader:: Using local file.";
#ifndef _WIN32
// (Windows port is incomplete and the build files do not support CURL, yet)
#include <curl/curl.h>
//
// ATTENTION: A certain part of the implementation lives in inlined code
// (see the bottom of the header file).
//
// Why? Because it allows stream parsing without having to call around a
// lot (one static and one dynamic call per character if the parser just
// reads one character at a time).
//
// Here is an overview of the code structure:
//
// readUrl
// -> transferBegin (sets up state)
// -> perform (starts CURL transfer)
// -> (specialized, type-erased) callback_template
// -> getInfo (fetches HTTP header, eventually initiates caching)
// -> stream.begin (client code - called once)
// -> feedBuffered (the buffering logic)
// -> stream.transfer (client code - called repeatedly)
// -> stream.end (client code - called when the transfer is done)
// -> transferEnd (closes cache file, if used)
//
// "->" means "calls or inlines", here
//
size_t const UrlReader::max_read_ahead = CURL_MAX_WRITE_SIZE;
char const* const UrlReader::success = "UrlReader: Success!";
char const* const UrlReader::success_cached = "UrlReader: Using local file.";
char const* const UrlReader::error_init_failed = "UrlReader: Initialization failed.";
char const* const UrlReader::error_aborted = "UrlReader: Processing error.";
char const* const UrlReader::error_buffer_overflow = "UrlReader: Buffer overflow.";
char const* const UrlReader::error_leftover_input = "UrlReader: Incomplete processing.";
#define hnd_curl static_cast<CURL*>(_ptrImpl)
#define _ptrCurl static_cast<CURL*>(_hndCurl)
UrlReader::UrlReader()
: _ptrImpl(0l), _arrXtra(0l), _strError(0l), _arrCacheRdBuf(0l) {
: _hndCurl(0l), _arrXtra(0l), _strError(0l), _arrCacheRdBuf(0l) {
_arrXtra = new(std::nothrow) char[max_read_ahead];
if (! _arrXtra) { _strError = error_init_failed; return; }
#ifndef NOCURL_IN_WINDOWS
_ptrImpl = curl_easy_init();
if (! _ptrImpl) { _strError = error_init_failed; return; }
curl_easy_setopt(hnd_curl, CURLOPT_NOSIGNAL, 1l);
curl_easy_setopt(hnd_curl, CURLOPT_FAILONERROR, 1l);
curl_easy_setopt(hnd_curl, CURLOPT_FILETIME, 1l);
curl_easy_setopt(hnd_curl, CURLOPT_ENCODING, "");
#endif
_hndCurl = curl_easy_init();
if (! _hndCurl) { _strError = error_init_failed; return; }
curl_easy_setopt(_ptrCurl, CURLOPT_NOSIGNAL, 1l);
curl_easy_setopt(_ptrCurl, CURLOPT_FAILONERROR, 1l);
curl_easy_setopt(_ptrCurl, CURLOPT_FILETIME, 1l);
curl_easy_setopt(_ptrCurl, CURLOPT_ENCODING, "");
}
UrlReader::~UrlReader() {
delete[] _arrXtra;
delete[] _arrCacheRdBuf;
#ifndef NOCURL_IN_WINDOWS
if (! hnd_curl) return;
curl_easy_cleanup(hnd_curl);
#endif
if (! _hndCurl) {
return;
}
curl_easy_cleanup(_ptrCurl);
}
bool UrlReader::perform(char const* url, transfer_callback* cb) {
#ifndef NOCURL_IN_WINDOWS
void UrlReader::perform(char const* url, transfer_callback* cb) {
curl_easy_setopt(hnd_curl, CURLOPT_URL, url);
curl_easy_setopt(hnd_curl, CURLOPT_WRITEFUNCTION, cb);
curl_easy_setopt(hnd_curl, CURLOPT_WRITEDATA, this);
curl_easy_setopt(_ptrCurl, CURLOPT_URL, url);
curl_easy_setopt(_ptrCurl, CURLOPT_WRITEFUNCTION, cb);
curl_easy_setopt(_ptrCurl, CURLOPT_WRITEDATA, this);
CURLcode rc = curl_easy_perform(hnd_curl);
CURLcode rc = curl_easy_perform(_ptrCurl);
if (rc == CURLE_OK)
{
@ -74,20 +95,33 @@ bool UrlReader::perform(char const* url, transfer_callback* cb) {
}
else if (_strError == success)
_strError = curl_easy_strerror(rc);
return rc == CURLE_OK;
#else
return false;
#endif
}
void UrlReader::getinfo(char const*& url,
char const*& type, int64_t& length, int64_t& stardate) {
#ifndef NOCURL_IN_WINDOWS
void UrlReader::transferBegin(void* stream, char const* cacheFile) {
_strError = success;
_ptrStream = stream;
_strCacheFile = cacheFile;
_ptrCacheFile = 0l;
_valCacheMode = no_cache;
_valXtraSize = ~size_t(0);
}
void UrlReader::getInfo(char const*& url,
char const*& type, int64_t& length, int64_t& stardate) {
// fetch information from HTTP header
double clen;
long time;
curl_easy_getinfo(hnd_curl, CURLINFO_FILETIME, & time);
curl_easy_getinfo(_ptrCurl, CURLINFO_FILETIME, & time);
curl_easy_getinfo(_ptrCurl, CURLINFO_EFFECTIVE_URL, & url);
curl_easy_getinfo(_ptrCurl, CURLINFO_CONTENT_TYPE, & type);
curl_easy_getinfo(_ptrCurl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, & clen);
length = static_cast<int64_t>(clen);
curl_easy_getinfo(_ptrCurl, CURLINFO_FILETIME, & time);
stardate = time;
printLog("UrlReader: Ready to transfer from URL '%s'\n", url);
// check caching file time whether we actually want to download anything
if (_strCacheFile != 0l) {
@ -96,7 +130,7 @@ void UrlReader::getinfo(char const*& url,
if (time > s.st_mtime) {
// file on server is newer -> update cache file
_ptrCacheFile = fopen(_strCacheFile, "wb");
printf("From URL: ");
printLog("UrlReader: Also writing content to cache file '%s'\n", _strCacheFile);
if (_ptrCacheFile != 0l) {
_valCacheMode = cache_write;
}
@ -105,26 +139,38 @@ void UrlReader::getinfo(char const*& url,
if (! _arrCacheRdBuf) {
_arrCacheRdBuf = new (std::nothrow) char[max_read_ahead];
if (! _arrCacheRdBuf) {
_valCacheMode = no_cache;
// out of memory, no caching, have CURL catch it
return;
}
}
_ptrCacheFile = fopen(_strCacheFile, "rb");
printf("From file: ");
printLog("UrlReader: Delivering cached content from file '%s'\n", _strCacheFile);
if (_ptrCacheFile != 0l) {
_valCacheMode = cache_read;
}
// override error code returned by CURL when we abort the download
_strError = success_cached;
}
}
curl_easy_getinfo(hnd_curl, CURLINFO_EFFECTIVE_URL, & url);
curl_easy_getinfo(hnd_curl, CURLINFO_CONTENT_TYPE, & type);
curl_easy_getinfo(hnd_curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, & clen);
length = static_cast<int64_t>(clen);
curl_easy_getinfo(hnd_curl, CURLINFO_FILETIME, & time);
stardate = time;
#endif
}
void UrlReader::transferEnd() {
if (_ptrCacheFile != 0l) {
fclose(_ptrCacheFile);
}
}
#else // no-op version for incomplete Windows build:
UrlReader::UrlReader() : _ptrImpl(0l) { }
UrlReader::~UrlReader() { }
void UrlReader::perform(char const* url, transfer_callback* cb) { }
void UrlReader::transferBegin(void* stream, char const* cacheFile) { }
void UrlReader::getInfo(char const*& url, char const*& type,
int64_t& length, int64_t& stardate) { }
void UrlReader::transferEnd() { }
#endif

View file

@ -22,7 +22,7 @@ class UrlReader {
enum CacheMode { no_cache, cache_write, cache_read };
void* _ptrImpl;
void* _hndCurl;
char* _arrXtra;
char const* _strError;
void* _ptrStream;
@ -145,13 +145,18 @@ class UrlReader {
UrlReader(UrlReader const&); // = delete;
UrlReader& operator=(UrlReader const&); // = delete;
inline bool isSuccess();
// entrypoints to compiled code
typedef size_t transfer_callback(char*, size_t, size_t, void*);
bool perform(char const* url, transfer_callback* transfer);
void transferBegin(void* stream, char const* cacheFile);
void transferEnd();
void getinfo(char const*& url,
void perform(char const* url, transfer_callback* transfer);
void getInfo(char const*& url,
char const*& type, int64_t& length, int64_t& stardate);
// synthesized callback
@ -163,33 +168,37 @@ class UrlReader {
char* input, size_t size);
};
template< class ContentStream >
bool UrlReader::readUrl(char const* url, ContentStream& s, char const* cacheFile) {
if (! _ptrImpl) return false;
_strCacheFile = cacheFile;
_ptrCacheFile = 0l;
_valCacheMode = no_cache; // eventually set later
_strError = success;
_ptrStream = & s;
_valXtraSize = ~size_t(0);
this->perform(url, & callback_template<ContentStream>);
s.end(_strError == success);
if (_ptrCacheFile != 0l) {
fclose(_ptrCacheFile);
}
inline char const* UrlReader::getError() const {
return _strError;
}
bool UrlReader::isSuccess() {
return _strError == success || _strError == success_cached;
}
inline char const* UrlReader::getError() const { return this->_strError; }
template< class ContentStream >
bool UrlReader::readUrl(char const* url, ContentStream& s, char const* cacheFile) {
if (! _hndCurl) return false;
this->transferBegin(& s, cacheFile);
this->perform(url, & callback_template<ContentStream>);
this->transferEnd();
bool ok = isSuccess();
s.end(ok);
return ok;
}
inline void UrlReader::setError(char const* staticCstring) {
if (this->_strError == success || this->_strError == success_cached)
if (this->isSuccess())
this->_strError = staticCstring;
}
template< class Stream >
size_t UrlReader::feedBuffered(Stream* stream, char* input, size_t size) {
size_t inputOffset = 0u;
while (true) {
@ -263,7 +272,7 @@ size_t UrlReader::callback_template(char *input, size_t size, size_t nmemb, void
// extract meta information and call 'begin'
char const* url, * type;
int64_t length, stardate;
me->getinfo(url, type, length, stardate);
me->getInfo(url, type, length, stardate);
if (me->_valCacheMode != cache_read) {
stream->begin(url, type, length, stardate);
}