#include "HTMLObjectParser.h"
#include "InternetResource.h"
#include "TIPsDatabase.h"
HTMLObjectParser::HTMLObjectParser(const int _pagegroupid, TIPsDatabase *_db, const Domain *_domain, const int _type):
HTMLParser(_pagegroupid, _db, _domain, _type) {}
HTMLObjectParser::~HTMLObjectParser() {}
const int HTMLObjectParser::parsertype() const {return DBF_PARSERTYPE_HTMLOBJECTPARSER;}
size_t HTMLObjectParser::init() {
//load domain - pagegroup specific settings
//we need to free the contents of m_settings
m_db->loadHTMLObjectParserSettings(m_pagegroupid, &m_settings);
return 0;
}
const size_t HTMLObjectParser::parse(const InternetResource *ir, vector *objects) const {
const char *body = ir->body();
size_t bodysize = ir->size();
if (!bodysize) bodysize = strlen(body);
vector properties;
vector matches;
unsigned int iMatchCount;
Filter *f;
DBEntity *newentity = 0,
*entity;
const char *name, //property 1 always
*firstname;
StringMultiMapCI::const_iterator icoCurrent,
icoEnd = m_entities.end();
#ifdef _DEBUG
if (m_domain != ir->internetURIRequest()->domain()) {
DEBUGERROR0("[HTMLArticleParser]: DomainMismatch()");
throw DomainMismatch();
}
#endif
try {
//gather all char values into a vector
for (FilterGroup::const_iterator iF = m_settings.begin(); iF != m_settings.end(); iF++) {
f = iF->second;
//one and only one match allowed (replacement is done automatically)
//zero length string = invalid
iMatchCount = f->findAll(body, 0, &matches);
if (iMatchCount == 1 && *matches[0]) {
properties.push_back(matches[0]);
} else {
for (unsigned int iP = 0; iP < iMatchCount; iP++) free((void*)matches[iP]);
if (f->description()) { //the required field is stored in the filter description as 0 or 1
//indicates required field
DEBUGERROR("[%s]: cannot find property [%i] in [%s]", m_domain->m_domain, f->filterId(), ir->internetURIRequest()->absoluteURL());
DEBUGERROR("[%s]: MissingProperty()", m_domain->m_domain);
for (vector::iterator iP = properties.begin(); iP != properties.end(); iP++) free((void*)*iP);
throw MissingProperty();
} else {
//indicates not required field
properties.push_back(strdupCheck(""));
}
}
matches.clear();
}
if (!properties.size()) {
DEBUGERROR("[%s]: NoProperties! MissingProperty()", m_domain->m_domain);
throw MissingProperty();
}
name = properties[0]; //must always be the first property
if (!name || !*name) { //must always be a valid string
DEBUGERROR("[%s]: MissingProperty()", m_domain->m_domain);
for (vector::iterator iP = properties.begin(); iP != properties.end(); iP++) free((void*)*iP);
throw MissingProperty();
}
//create the object
switch (m_type) {
case DBF_OBJECT_COMPANY: {
if (properties.size() != 1) {
DEBUGERROR("[%s]: NotEnoughProperties() for type [%i]", m_domain->m_domain, m_type);
for (vector::iterator iP = properties.begin(); iP != properties.end(); iP++) free((void*)*iP);
throw NotEnoughProperties();
}
newentity = new Company(m_db, ir, name);
break;
}
case DBF_OBJECT_COUNTRY: {
break;
}
case DBF_OBJECT_PERSON: {
if (properties.size() != 2) {
DEBUGERROR("[%s]: NotEnoughProperties() for type [%i]", m_domain->m_domain, m_type);
for (vector::iterator iP = properties.begin(); iP != properties.end(); iP++) free((void*)*iP);
throw NotEnoughProperties();
}
//properties[0] = commonalias (Idi Amin)
//properties[1] = full name (Idi Amin Dada Oumee)
newentity = new Person(m_db, ir, name, properties[1]);
break;
}
case DBF_OBJECT_PRODUCT: {
switch (properties.size()) {
//case 1: {
//properties[0] = company name with product name
//newentity = new Product(m_db, ir, name);
//break;
//}
case 2: {
//properties[0] = product name
//properties[1] = company name
newentity = new Product(m_db, ir, name, properties[1]);
break;
}
default: {
DEBUGERROR("[%s]: NotEnoughProperties() for type [%i]", m_domain->m_domain, m_type);
for (vector::iterator iP = properties.begin(); iP != properties.end(); iP++) free((void*)*iP);
throw NotEnoughProperties();
}
}
break;
}
case DBF_OBJECT_PRODUCTTYPE: {
break;
}
default: {
DEBUGERROR("[%s]: UnknownObjectType(%i) during construction", m_domain->m_domain, m_type);
for (vector::iterator iP = properties.begin(); iP != properties.end(); iP++) free((void*)*iP);
throw UnknownObjectType();
break;
}
}
if (newentity) {
//search for the object in the cached in-memory map
//Database conducts a more thourough search
//this is just to try and weed out obvious matches
firstname = newentity->firstname();
icoCurrent = m_entities.find(firstname); //search for the company first name
if (icoCurrent != icoEnd) {
//cycle through the entities with this first word in length desc order
for (; icoCurrent != icoEnd && !_STRCASECMP(icoCurrent->first, firstname); icoCurrent++) {
entity = icoCurrent->second;
if (strlicmp(firstname, entity->name())) { //compare current entity
//found: delete and ignore
delete newentity;
newentity = 0;
break;
}
}
}
//add this to the list of objects to save to the DB
if (newentity) objects->push_back(newentity);
}
for (vector::iterator iP = properties.begin(); iP != properties.end(); iP++) free((void*)*iP);
}
catch (NotEnoughProperties) {}
catch (MissingProperty) {}
return objects->size();
}