#ifndef _SPIDER_H #define _SPIDER_H #include "extensions.h" #include "Domain.h" #include "DomainConnection.h" #include "TIPsDatabase.h" #include "Protocols.h" #include "ResourceUseEventSink.h" class SpiderEventSink; class Protocol; class Spider: public ResourceMonitor { enum status { created, working, waiting, stopped } m_status; Domain *m_domain; InternetURIRequest *m_currentPage; bool m_run; //for custom requests to Google about sites static DomainConnection m_googleDC; static Domain m_googleDomain; static Filter m_fGetGoogleCount; unsigned int googleCount(); //threaded run static THREAD_CALLBACK_TYPE staticrunasync( LPVOID lpParam); //lpParam = the Spider this static void staticrunCleanup(LPVOID lpParam); //lpParam = the Spider this int runasync(); //actual run function pthread_t m_runThread; //run thread handle SpiderEventSink *m_sm; //Manager receives events when pages/domain etc. are finished DomainConnection m_dc; //Spiders own low-level socket connection 1-1 with Domain static TIPsDatabase *m_db; //Database requests (can be HTTP/PostGRES) char *m_buffer; //buffer is passed to DC, Protocol and whoever wants to know about the last response const size_t m_buffersize; //buffer size: defaults to 60kb Protocol *m_currentProtocol; //Protocol handling the current request (transient, only last for 1 request) void attachToDomain(); void detachFromDomain(); //reports access all parts of all relevant classes friend class Report; friend class Report_Full; public: Spider(SpiderEventSink *_sm, Domain *_domain, TIPsDatabase *_db, const size_t _buffersize = 60*1024); //default: 60k ~Spider(); //accessors Domain *domain() {return m_domain;} //exceptions class StillTalking {}; int run(); //asynchronous run (using the private run functions) int stop(); //requests end asynchronously, use waitStop() to wait for the result int waitStop(); //needs to synchronously kill the thread... }; class SpiderEventSink: public ResourceMonitor { public: SpiderEventSink(ResourceMonitor *_re=0): ResourceMonitor(_re) {} virtual int finishedDomain(Spider *spider) {return 0;} virtual int finishedPage(Spider *spider, InternetResource *resource) {return 0;} }; #endif