/* This file is part of the Connector Framework
 * Copyright (C) 2008-2013 Index Data
 * See the file LICENSE or details.
 *
 * This is the CF-specific part of the cproxy filter. Overview:
 *  - Extract session info from the URL
 *  - Read session info
 *  - Merge cookies from request and the session, and other headers
 *  - Set things up for the rewrite module
 *  - Pass the packet to the rewrite module
 *  - Mangle cookies before returning to the browser
 * 
 * Plus, there are some special cases:
 *  - If we don't have a session number in the URL, check the referrer
 *    header, and if we find one there, redirect to a proper URL
 *  - If that didn't help, check if we have a cookie with the session info.
 *    Redirect as above.
 *  - If we don't have a session file, but do have a .p file from a Z-request,
 *    invoke CF with a content connector to create a session.
 * 
 * Then there are the debug options
 *  - Setting a debug flag in the config, or in the URL turns on debugging, and
 *    - Forces content type to text/plain, so we can see the output
 *    - Collects debug info into a buffer when ever something happens
 *    - Also outputs it all to yazlog
 *    - Prepends the debug buffer to the actual output
 *  - Creating a dump directory next to the session file, will dump all debug info
 *    from that session in the directory, one file per request
 * 
 * Later
 *  - More debug options?
 *  - Get better cookies from CF, use also the path for checking
 *  - Parse response cookie, see if the server tries to kill a cookie
 *    (by setting expiration etc). Mark in the session file (append to it)
 */

#include <iostream>
#include <fstream>

#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <dirent.h>

#include <metaproxy/filter.hpp>
#include <metaproxy/package.hpp>
#include <metaproxy/util.hpp>
#include "filter_cproxy.hpp"

#include <yaz/zgdu.h>
#include <yaz/log.h>
#include <yaz/url.h>

#include <boost/thread/mutex.hpp>
#include <boost/regex.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/algorithm/string/predicate.hpp>

#include <list>
#include <map>

#if HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif

#include <sys/stat.h>

const char *cfconfigdefault = "/etc/cf-proxy/cproxy.cfg";
// Debug mode flags
// Can be set in the config, or in the url
const int debug_trace = 1;  // simple trace of what happens
const int debug_verbose = 2; // More details in the trace
const int debug_nomove = 4;   // Do not pass request on, only output debugs
const int debug_keepcontent = 8; // do not force to texty/plain
const int debug_dump = 16; // create a dump dir, dump files there
const int debug_cookie = 32; // analyze dump dir for cookie problems

namespace mp = metaproxy_1;
namespace yf = mp::filter;

namespace metaproxy_1 {
    namespace filter {
        class CProxy::Rep { // Private, internal representation
            friend class CProxy;
            void parse_url(Handle &h, Z_HTTP_Request *hreq );
            bool referer_trick(yf::CProxy::Handle &h,
                               Z_HTTP_Request *hreq,
                               mp::Package &package );
            void make_request_headers(yf::CProxy::Handle &h,
                                Z_HTTP_Request *hreq );
            void request_cookies(yf::CProxy::Handle &h,
                                Z_HTTP_Request *hreq );
            void add_resp_cookie(yf::CProxy::Handle &h,
                                 Z_HTTP_Response *hres );
            void dump_response_cookies(yf::CProxy::Handle &h,
                                  Z_HTTP_Response *hres );
            void response_cookies(yf::CProxy::Handle &h,
                                  Z_HTTP_Response *hres );
            void postprocess_content(yf::CProxy::Handle &h,
                                 Z_HTTP_Response *hres );
            bool read_cf_session(yf::CProxy::Handle &h);
            std::string cf_sessionfilename(yf::CProxy::Handle &h,
                std::string tricksession = ""  );
                // can be used in the referer tricks to force a session
            int file_age( yf::CProxy::Handle &h,
                           std::string filename, int defaultvalue );
            bool cf_session_too_old(yf::CProxy::Handle &h,
                                    std::string sesfilename );
            bool read_mp_params(yf::CProxy::Handle &h);
            bool create_session(yf::CProxy::Handle &h);
            void checkdumpfile(yf::CProxy::Handle &h);
            void error_page( mp::Package &package,
                    Z_HTTP_Request *hreq,
                    yf::CProxy::Handle &h,
                    int code,
                    const char *msg );
            void dump_gdu( Z_GDU *zgdu,
                    yf::CProxy::Handle &h,
                    std::string msg );
            void read_cf_config();
            void cleansessionfiles(int debug);
            void check_cookietrace(yf::CProxy::Handle &h);
        private:

            // Debug flag
            int debug;
            // The following come fron the filter config
            int sessionmaxage; // how many minutes the session may live
            std::string cfconfig; // Path to the cfengine proxy config file
            // The following will be loaded from the cf config file
            std::string proxyhostname; // hpxy.indexdata.com/XXX/node102
            std::string sessiondir;
            std::string cfengine;
            // The following will be extracted from the configs above
            std::string proxyhost; // hpxy.indexdata.com
            std::string proxyprefix; // xxx/node102
            std::vector< std::string> disableposttrick; // list of regexp strings.
                         // If any match, the host-relative POST request trick
                         // is not applied. See CPXY-89
        };
    }
}

class yf::CProxy::Handle {  // handle for a single HTTP request
  friend class CProxy;
private:
    class Cookie {
        friend class yf::CProxy::Rep;
    public:
        std::string name;
        std::string value;
        std::string domain;
        std::string path;
        std::string expires;
        std::string secure;
        std::string dbg; // where it comes from, when analyzing
        int seen; // used in cookie analysis to mark what we have seen
        std::string to_string () const; 
        Cookie(std::string line,std::string defaulthost="", std::string defaultpath="");
        bool match( std::string thost, std::string tpath);
    };
    class customreplace {
        friend class yf::CProxy::Rep;
    public:
        std::string pattern;
        std::string replacement;
        std::string options;
        std::string content_type;
    };
    int debug; // see bit masks above.
    bool isfirst; // tells if this is the first request in session (__start)
    std::string debugbuf;  // All debug output
    std::string cookiebuf; // summary of cookies coming and going, for debug
    mp::odr odr;

    // The following come from the URL
    std::string session;
    std::string sesfilename; // cached here, used so often
    std::string targethost;
    std::string targetpath;
    // The following come from the cf session file
    std::string referer;
    std::string username;
    std::string password;
    std::string proxyip;
    std::vector<Cookie> cookies;
    std::vector<customreplace> customreplaces;
    
    // Stuff from the parameter file, for the content connector
    std::string c_connector; // "gale_aone_sru-content"
    std::string c_user; // auth
    std::string c_password; // auth
    std::string c_proxy; // "140.234.8.176:80"
    std::string c_realm;  // "test"

    // The dump file
    int dumpfile; // have to use an old-fashioned file handle here, only it can
                  // can be opened O_EXCL
                  // -1 means not opened yet
                  // -2 means could not open (the most common case, actually)
    // constructor
    Handle( int dbg );
    void setdebug( int dbg );
    // Debug messages - into yazlog, debug buffer, etc
    void db( std::string msg, int condition = -1 );
    void db2( std::string msg );
    void cookiedb(std::string msg );
    void savecookietrace();
    void closedumpfile();
};


yf::CProxy::CProxy() : m_p(new Rep)
{
}

yf::CProxy::~CProxy()
{
}


yf::CProxy::Handle::Handle(int dbg ) {
    debug = dbg;
    dumpfile = -1; // not opened (yet?)
    isfirst = false;
};

//////////////
// Helper functions and classes

// Little helper for dumping numbers
std::string itoa( int i)
{
    char buf[64];
    sprintf(buf,"%d", i);
    return std::string(buf);
}


// Small helper to extract a substring by a regular expression
// The expression must have exactly one pair of parentheses
// Returns the default value if no match, and that defaults to empty
static std::string extract( std::string str,
           const char *regexp, std::string defval="")
{
    std::string result("");
    boost::regex re( regexp, boost::regex::perl|boost::regex::icase);
    boost::cmatch matches;
    if ( boost::regex_search(str.c_str(), matches, re) )
        result = std::string ( matches[1].first, matches[1].second );
    else 
        result = defval;
    return result;
}

//// Cookie class

std::string yf::CProxy::Handle::Cookie::to_string () const {
    std::string ret( "name='" + name + "' value='" + value + "'");
    if ( ! domain.empty() ) ret += " domain='" + domain + "'";
    if ( ! path.empty() )   ret += " path='" + path + "'";
    if ( ! expires.empty() )ret += " expires='" + expires + "'" ;
    if ( ! secure.empty() ) ret += " 'secure'";
    if ( ! dbg.empty() )    ret += " dbg='" + dbg + "'";
    return ret;
}

// Check if the cookie has domain/path that matches a given target domain/path
bool yf::CProxy::Handle::Cookie::match( std::string tdomain, std::string tpath)
{
    if ( !domain.empty() ) 
    {
        if ( ! boost::algorithm::iends_with(tdomain, domain) ) 
        {
            yaz_log(YLOG_LOG,"[cproxy] cookie domain mismatch '%s' '%s' %s=%s",
                    tdomain.c_str(), domain.c_str(),
                    name.c_str(), value.c_str() );  // ###
            return false;
        }
    }
    if ( !path.empty() && path != "/" ) 
    {
        std::string cpath = path; 
        if ( *cpath.begin() == '/' )  // remove leading /
            cpath.erase( cpath.begin() );      
        if ( ! boost::algorithm::istarts_with( tpath, cpath ) )
        {
            yaz_log(YLOG_LOG,"[cproxy] cookie path mismatch '%s' '%s' %s=%s",
                    tpath.c_str(), path.c_str(), 
                    name.c_str(), value.c_str()  );  // ###
            return false;
        }
    }
    yaz_log(YLOG_LOG,"[cproxy] cookie match '%s' '%s'  '%s' '%s'  %s=%s",
            tdomain.c_str(), domain.c_str(),
            tpath.c_str(), path.c_str(),
            name.c_str(), value.c_str() );  // ###
    return true; 
}

// Cookie constructor from a sting (Set-Cookie header or session line)
yf::CProxy::Handle::Cookie::Cookie ( std::string line,
        std::string defaulthost, std::string defaultpath )
{
    std::string regex("([^ =#;]+)=([^;]*)(.*)$" );
      // the (first) name-value pair, and the rest
    boost::regex re(regex);
    boost::cmatch matches;
    if (boost::regex_search(line.c_str(), matches, re) )
    {
        name   = std::string( matches[1].first, matches[1].second);
        value  = std::string( matches[2].first, matches[2].second);
        std::string rest = std::string( matches[3].first, matches[3].second);        
        domain = extract(rest, "[ ;]Domain=([^;]+)", defaulthost );
        path   = extract(rest, "[ ;]Path=([^;]+)", defaultpath );
        expires= extract(rest, "[ ;]Expires=([^;]+)" );
        secure = extract(rest, "[ ;](Secure)" );
    }
    seen = -1;
}

//////////////////
// Error handling 


// Produce an error page 
void yf::CProxy::Rep::error_page( mp::Package &package,
                                  Z_HTTP_Request *hreq,
                                  yf::CProxy::Handle &h,
                                  int code,
                                  const char* msg )
{
    Z_GDU *gdu_res = h.odr.create_HTTP_Response( package.session(), hreq, code);
    Z_HTTP_Response *hres = gdu_res->u.HTTP_Response;
    std::string content;
    char ebuf[32];
    sprintf(ebuf,"%d",code);
    content = "<h1>Error " + std::string(ebuf) + "</h1>\n" + msg + "\n";
    h.db("Error " + std::string(ebuf) + " " + msg );
    if ( h.debug )
    {
        content += "<p/>\n<pre>\n" + h.debugbuf + "</pre>\n";
    }

    hres->content_buf = odr_strdup(h.odr, content.c_str());
    hres->content_len = strlen(hres->content_buf);
    package.response() = gdu_res;
    h.cookiedb ("Returning error page " + itoa(code) + " " + std::string(msg) );
    h.savecookietrace();
}

/////////////
// Debug stuff
// simple debug trace
// usually called without the condition, defaults to the
// debug flag in the handle
void yf::CProxy::Handle::db(std::string msg, int condition  ) {

    if ( condition == -1 )
    {
        condition = debug;
    }
    if ( condition )
    {
        debugbuf += msg;
        if ( debug & debug_keepcontent )
        { // We keep the (html) content type,
            debugbuf += " <br/>"; // so add a visible newline
        }
        debugbuf += "\n"; // and a regular newline in any case
        yaz_log(YLOG_LOG, "[cproxy] %s", msg.c_str() );
    }
    // Write to the dump file, even if no debug flag set
    // That means that the dump files are always verbose. So be it.
    if ( dumpfile > -1 )
    {
        msg += "\n"; 
        write( dumpfile, msg.c_str(), msg.length() );
        // only one write, less risk of concurrency problems
    }
}

// more verbose debug trace
void yf::CProxy::Handle::db2(std::string msg ) 
{
  db ( msg, (debug & debug_verbose) );
}

// Cookie debugging - do a normal db, but append to the cookiebuf too,
// for dumping in the cookie trace file
void yf::CProxy::Handle::cookiedb(std::string msg )
{
    db(msg);
    cookiebuf += msg + "\n";
}

// Save the cookiebuf in the cookie trace file, if we are dumping
void yf::CProxy::Handle::savecookietrace()
{
    if ( dumpfile > -1 )
    {
        std::string dumpdir = sesfilename;  // cached, like /tmp/cf.17
        if ( dumpdir.empty() ) 
            return; // Did not have a session, or something else went wrong
        dumpdir += ".dump/";
        std::string tracefilepath = dumpdir + "_cookietrace";
        std::ofstream outfile(tracefilepath.c_str(), std::ios_base::app);
        outfile << cookiebuf ;
        outfile.close();
        db2("Wrote trace file entry in " + tracefilepath );
    } else
        db2("Not saving the cookietrace, no dumpfile");
    
} // savecookietrace


// Read the cookie trace and find problematic cookies
// The code is a bit messy, as it builds a cookie warning segment,
// a cookie trace segment, and makes log entries while doing all that
// In the end the segments are logged too, but the log may be a tad
// confusing to read... The output segments are supposed to work better
// on a browser.
void yf::CProxy::Rep::check_cookietrace(yf::CProxy::Handle &h)
{
    h.db2("Starting to analyze cookies");
    std::string dumpdir = h.sesfilename;  // cached, like /tmp/cf.17
    if ( dumpdir.empty() )
        return; // Did not have a session, or something else went wrong
    dumpdir += ".dump/";
    std::string tracefilepath = dumpdir + "_cookietrace";
    h.db2("Analyzing cookies from " + tracefilepath );
    std::string t; // cookie trace analysis
    std::string w; // cookie warnings, if any
    std::ifstream cfile( tracefilepath.c_str() );
    std::vector<yf::CProxy::Handle::Cookie> jar;
    if ( ! cfile.is_open() )
        h.db("OOPS - Could not read cookie file " + tracefilepath +
            " - Did you check the page with cproxydebug-dump first?");
    else {
        std::vector<yf::CProxy::Handle::Cookie>::iterator ci;
        for(ci = h.cookies.begin(); ci < h.cookies.end(); ci++)
        {
            t += "Session cookie " + ci->to_string() + "\n";  
            ci->dbg = "session";
            jar.push_back(*ci);
        }
        if ( t.empty() )
            t = "No cookies from the session\n";
        std::string line;
        std::string reqhost; // the host and path of each request seen
        std::string reqpath; // in the log, unproxified
        int reqno = 0;
        while ( cfile.good() )
        {
            getline (cfile,line);
            if ( line == "====" )
            {
                getline (cfile,line);
                t += itoa(++reqno) + ": " + line + "\n";
                // extract domain and path
                std::string regex ( "(http://" + proxyhost + ")?" +
                        "/" + proxyprefix + "/\\d+/([^/]+)/?(.*)$" );
                boost::regex re( regex, boost::regex::perl|boost::regex::icase);
                boost::cmatch matches;
                line = boost::regex_replace(line, // remove debug flag
                    boost::regex( "/cproxydebug(-[^/]+)?/" ),
                    "/", boost::format_first_only);
                if (!boost::regex_search(line.c_str(), matches, re))
                {
                    h.db("Could not parse host/path out of cookietrace line " +
                       line ); // Should not happen
                    h.db2("re=" + regex);
                    reqhost = "??";
                    reqpath = "??";
                    continue;
                }
                reqhost = std::string ( matches[2].first, matches[2].second );
                reqpath = std::string ( matches[3].first, matches[3].second );
                h.db2("Host='" + reqhost + "' path='" + reqpath ); // ###
            } // new request
            else if ( line.compare(0,28,"Original request cookie line") == 0 ) {
                t += "   " + line + "\n";
                // Loop through the request cookies,
                // and note those that do not come from the jar
                // List all cookies in the process. Move the wrong-
                // value check into this loop.
                boost::regex regex( "[^'\" ;=]+=[^'\";]*[; ]*" );
                  // Quotes in cookie values may give false alarms
                boost::sregex_token_iterator iter(line.begin(), line.end(), regex, 0);
                boost::sregex_token_iterator end;
                for( ; iter != end; ++iter )
                {
                    //h.cookiebuf += "  " + *iter + "\n";
                    yf::CProxy::Handle::Cookie rq(*iter);
                    t += "     " + rq.to_string() + "\n";
                    bool okseen = false;
                    if ( ! jar.empty() )
                    { // Find the cookie in the jar
                        for(ci = jar.end()-1; ci >= jar.begin(); ci--)
                        {
                            if ( ci->name == rq.name )
                            {  // Ok, a matching line
                                ci->seen = reqno; // we have seen it
                               // Three cases:
                                if (ci->match(reqhost,reqpath) )
                                {
                                    if ( ci->value == rq.value )
                                    { // 1: value and dom/path match. ok
                                        t += "       OK: " +
                                            ci->to_string() + "\n";
                                        okseen = true;
                                    }   
                                    else
                                    { // 2: value mismatch, path matches. no good!
                                        if ( okseen)  // but we saw an OK already
                                            t += "       Hmmm! Bad Value. "
                                              "Expected: " + ci->to_string()
                                               + "\n";
                                        else { // not seen a OK, a real OOPS
                                            t += "       OOPS! Bad Value. "
                                              "Expected: " +  ci->to_string()
                                              + "\n";
                                            w += "#" + itoa(reqno) + ": " +
                                            "Bad cookie value. " +
                                            "Got '" + rq.value + "' " +
                                            "but expected " +
                                            ci->to_string() + "\n";
                                        }
                                    }
                                } // match
                                else 
                                { // 3: dom/path mismatch. Should not be there
                                  // at all, but may well be harmless
                                    t += "       dom/path mismatch. " 
                                         "Cookie should not be there at all: " +
                                         ci->to_string() + "\n";
                                    w += "#" + itoa(reqno) + ": " 
                                         "Unexpected cookie  " +
                                         "dom='" + reqhost + "' " +
                                         "path='" + reqpath + "' " +
                                         "\n       " +
                                         ci->to_string() + "\n";
                                }
                            }
                        } // jar loop
                    }
                }
                for(ci = jar.begin(); ci < jar.end(); ci++)
                { // Check all cookies in the jar, if we missed one
                    if (ci->match(reqhost,reqpath) && ci->seen != reqno )
                    { 
                        std::string warn = "Missing cookie! Expected " +
                           ci->to_string();
                        w += "#" + itoa(reqno) + ": " + warn + "\n";
                        t += "   OOPS! " + warn + "\n";
                        h.db2("OOPS! " + warn);
                    }
                } // jar loop
            }
            else if ( line.compare(0,15,"= HTTP response") == 0  ||
                      line.compare(0,9,"Location:") == 0 ||
                      line.compare(0,16,"Faking a referer") == 0 ||
                      line.compare(0,20,"Referer-header trick") == 0 ||
                      line.compare(0,25,"request, can not redirect") == 0 )
                t += " " + line + "\n";
            else if ( line.compare(0,20,"Added cproxy session") == 0 )
                t += "   " + line + "\n";
            else if ( line.compare(0,10,"Set-Cookie") == 0 )
            { 
                h.db2( line );
                line = boost::regex_replace(line, boost::regex("'"), "");
                
                yf::CProxy::Handle::Cookie c(line, reqhost, reqpath);
                c.dbg = "#" + itoa(reqno);
                jar.push_back(c);
                t += "   " + line + "\n";
            }
            // See what cookies should have been there
            // Compare to request cookies
        }
    }
    // Dump the cookie jar in the trace
    std::vector<yf::CProxy::Handle::Cookie>::iterator ci;
    t += "All cookies:\n";
    for(ci = jar.begin(); ci < jar.end(); ci++)
    {
        t += "  " + ci->to_string() + "\n";  
    }
    
    // Put the warnings and trace in the beginning of the debug buf
    if ( ! w.empty() )
        w = "\n=== Cookie warnings === \n" + w;
    else if ( ! t.empty() )
        w = "\n=== (No cookie warnings) ===\n";
    if ( ! t.empty() )
        t = "\n=== Cookie trace === \n" + t;
    else
        t = "\n=== (No cookie trace) ===\n";
    h.db(w);
    h.db(t);
    h.debug |= debug_nomove; // no point in moving the package
      // we are interested in the cookie analysis, and not the response
} //check_cookietrace

// Dump a request or response
void yf::CProxy::Rep::dump_gdu( Z_GDU *zgdu,
                                yf::CProxy::Handle &h,
                                std::string msg )
{
    int len;
    ODR enc = odr_createmem(ODR_ENCODE);
    int r = z_GDU(enc, &zgdu, 0, 0);
    char *buf = odr_getbuf(enc, &len, 0);
    h.db( "\n" + msg + "\n" + std::string(buf,len) );
    odr_destroy(enc);
}

// Set the ebug level.
void yf::CProxy::Handle::setdebug(int dbg )
{
    debug = dbg;
    char buf[32];
    sprintf(buf,"%d",debug);
    db("Using debug level " + std::string(buf) );
};

// Check if we have a dump directory, or if we want one (debug_dump)
// and open a dump file there
void yf::CProxy::Rep::checkdumpfile(yf::CProxy::Handle &h)
{
    if ( h.dumpfile > -1 )
    {
        h.db2("dump file already open");
        return;
    }
    std::string dumpdir = cf_sessionfilename(h);  // /tmp/cf.17
    if ( dumpdir.empty() ) {
        return; // Did not have a session, or something else went wrong
    }
    dumpdir += ".dump/";
    if ( h.debug & debug_dump )
    {
        if ( mkdir( dumpdir.c_str(), 0755 ) == 0 )
        {
            h.db("Created a dump directory " + dumpdir );
        }
        else
        {
            int err = errno;  
            if ( err == EEXIST )
            {
                h.db2("Dump dir " + dumpdir + " exists already");
            }
            else
            {
                h.db("Failed to create a dump dir " + dumpdir + ": " +
                    boost::lexical_cast<std::string>(err) + " " +
                    std::string( strerror(err) ) );
            }
        }
    }
    std::string name = h.targethost + "_";
    std::string mypath = h.targetpath;
    std::string::size_type end = mypath.find_first_of('?');
    if ( end != std::string::npos )
    {
        mypath.erase(end);
    }
    std::replace( mypath.begin(), mypath.end(), '/', '_');
    name += mypath;
    int nextserial = 1;
    std::string dumpfilename = name;
    h.dumpfile = -1;
    while ( (h.dumpfile == -1) && (nextserial < 20) )
    {  // defensive coding, never loops more than 20 tires
        std::string dumpfilepath = dumpdir + dumpfilename;
        h.dumpfile = open( dumpfilepath.c_str(),
                           O_CREAT | O_EXCL | O_WRONLY,   // create, fail if exists
                           S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH  );
        if ( h.dumpfile == -1 )
        { // some sort of error
            int err = errno;
            h.db2("Creating " + dumpfilepath +" failed with " +
                boost::lexical_cast<std::string>(err) + " " +
                std::string( strerror(err) ) );
            if ( err == ENOENT )
            {
                h.db2("ENOENT - no such file, probably no dump dir");
                h.dumpfile = -2; // don't try again
            }
            else if ( err == EEXIST )
            {
                h.db("Dump file exists, trying higher number: " + dumpfilename);
                dumpfilename = name + "." +
                    boost::lexical_cast<std::string>(nextserial);
                nextserial++;
            }
        }
        else
        {
            h.db("Created dump file " + dumpfilepath );
            if ( ! h.debugbuf.empty() )
            { // dump what we have collected debug so far
                write( h.dumpfile, h.debugbuf.c_str(), h.debugbuf.length() );
            }
            // Create a symlink __start to point to the file
            // Only works if symlink doesn't exist yet, so the first one wins
            // Thus __start will point to the first file in the session, the one
            // we need to start from when following redirects
            std::string startname = cf_sessionfilename(h);  // /tmp/cf.17
            startname += ".dump/__start";
            if ( symlink( dumpfilename.c_str(), startname.c_str() ) == 0 )
            {
                h.db("Created symlink " + startname + " to point to " + dumpfilename );
                h.isfirst = true;
            }
            else
            {
                int err = errno;
                h.db("Failed to create the __start symlink. errno= " +
                  boost::lexical_cast<std::string>(err) + ": " +
                  std::string( strerror(err) )
                );
            }
        } // created dumpfile ok
    }; // try higher number loop
}; // checkdumpfile

void yf::CProxy::Handle::closedumpfile()
{
    if ( dumpfile > -1 )
    {
        if ( !close(dumpfile) ) {
            int err = errno;
            dumpfile = -1;
            db("Failed to close the dumpfile. errno = " +
                  boost::lexical_cast<std::string>(err) + ": " +
                  std::string( strerror(err) ) );
        } // error in closing it
    }
    dumpfile = -1; // in any case
} // closedumpfile

////////////////
// Session stuff

// Parse the URL path. Sets the following in the handle
//  - debug
//  - session
//  - targethost
//  - targetpath
// The url is typically in the form
//    http://pxy.id.com/pre/fix/9999/host.domain.com/path/?params
// or more likely
//    /pre/fix/9999/host.domain.com/path/?params
// The debug flag can be set by embedding /cproxydebug/ in the url, or
// /cproxydebug-n/ where n is the debug level, or cproxydebug-flag,
// where flag is one or more of the following:
//  - verbose
//  - nomove
//  - keepcontent
//  - dump
// In case of errors, does not set the session etc.
void yf::CProxy::Rep::parse_url(yf::CProxy::Handle &h,
                                Z_HTTP_Request *hreq )
{
    std::string url(hreq->path);
    h.db2("Parsing url " + url );
    // remove the debug flag
    boost::regex re1( "/cproxydebug(-[^/]+)?/" );
    boost::cmatch matches;
    if ( boost::regex_search(url.c_str(), matches, re1) )
    {
        std::string level = std::string( matches[1].first, matches[1].second );
        url = boost::regex_replace(url, re1, "/", boost::format_first_only);
        hreq->path = odr_strdup(h.odr, url.c_str() );
        if ( level.empty() )
        {
            h.setdebug(1);
        }
        else
        {
            int lev = atoi(level.c_str());
            if ( lev != 0 ) {
                h.setdebug(-lev); // the hyphen made it negative
            }
            else {
                lev = 1;
                if ( strstr(level.c_str(), "-verbose") )
                {
                    lev |= debug_verbose;
                }
                if ( strstr(level.c_str(), "-nomove") )
                {
                    lev |= debug_nomove;
                }
                if ( strstr(level.c_str(), "-keepcontent") )
                {
                    lev |= debug_keepcontent;
                }
                if ( strstr(level.c_str(), "-dump") )
                {
                    lev |= debug_dump;
                }
                if ( strstr(level.c_str(), "-cookie") )
                {
                    lev |= debug_cookie;
                }
                h.setdebug(lev);
            }
        }
        h.db2("Found a cproxydebug setting. Url after removing: " + url);
    } else {
        h.db2("No cproxydebug settings in the url");
        // yes, this makes sense, the debug may already be set in the config file.
    }

//    std::string regex ( "^/" + proxyprefix + "/(\\d+)/([^/]+)/?(.*)$" );
    // The url may be just the path (relative to the HOST header,
    // or a full url with http:// and the host
    std::string regex ( "^(http://" + proxyhost + ")?" +
            "/" + proxyprefix + "/(\\d+)/([^/]+)/?(.*)$" );
    boost::regex re2( regex, boost::regex::perl|boost::regex::icase );
      // Using a case-INsensitive matching, because we have seen javascript
      // that lowercases whole URLs, which may mess with an uppercase
      // prefix. See CPXY-64 
    if (boost::regex_search(url.c_str(), matches, re2))
    {
      h.session = std::string ( matches[2].first, matches[2].second );
      h.targethost = std::string ( matches[3].first, matches[3].second );
      h.targetpath = std::string ( matches[4].first, matches[4].second );
      h.db("Got session '" + h.session + "' "
            "host '" + h.targethost + "' "
            "path '" + h.targetpath +  "'" +
            "method '" + hreq->method + "'" );
      checkdumpfile(h);
    } else {
      h.db("No session found in the URL " + url );
    }
} // parse_url

// Referer-trick: If we have a referer-header that looks like it would
// be proxified, extract the session from it, build a new proxified URL,
// and redirect there. Can not use the session directly, because we may
// meet a link on that page that does not have a a session prefix in it,
// we would then not have a referer header to fall back on.
// Except when we have a request that is not GET, then we have to use
// the request directly, since those can not be redirected, the browser
// will send a GET request to the new address, without parameters.
// Returns true in case it could do a redirect (which is set in the
// package, ready to go, or false if not. May have faked session values
// in the handle if we should use this request directly.
// The user looks at a (proxified) url
//   http://pxy.id.com/prefix/9999/host.com/page.html
// It has a link that for some reason was not proxified, host-relative:
//   href="/other.html"
// The browser prepends the current host, as it should, and tries to get
//   http://pxy.id.com/other.html
// It also adds a referer header
//   http://pxy.id.com/prefix/9999/host.com/page.html
// When we get the request, we can obviously not find a session in it.
// So we try to extract the necessary details from the referer, and redirect to
//   http://pxy.id.com/prefix/9999/host.com/other.html
// Due to the way Metaproxy does things, it is possible that the path in the
// request is an absolute path
//   http://pxy.id.com/other.html
// instead of the ususal case of plain
//   /other.html
//
// A further fallback is to look at a cproxysession cookie, take the session
// number from there, and redirect as before.
//
// As noted above, only GET requests can be redirected (CPXY-81)

bool yf::CProxy::Rep::referer_trick(yf::CProxy::Handle &h,
                                    Z_HTTP_Request *hreq,
                                    mp::Package &package )
{
    const char *ref = z_HTTP_header_lookup(hreq->headers, "Referer");

    if ( !ref )
    {
        h.db2("No referer header at all");
        ref = ""; // empty string, not null. Will fail to match below
    }
    h.db2("Looking at referer header " + std::string(ref) );
    std::string regex ( "^(http://" + proxyhost + "/" + proxyprefix + ")/(\\d+)/"
                        "([^/]+)(/.*)?" );
    boost::regex re2( regex, boost::regex::perl );
    boost::cmatch matches;
    std::string fakereferer;
    if ( ! boost::regex_search(ref, matches, re2))
    {
        h.db2("Could not match referer header, not trying referer trick");
        h.db2("Header: " + std::string(ref) );
        h.db2("Regex: " + regex );
        // try to build a referer-line from the cproxysession cookie
        const char *cookieline =  z_HTTP_header_lookup(hreq->headers, "Cookie");
        if ( ! cookieline ) 
        {
            h.db("Could not get a cookieline to fall back on");
            return false;
        }
        std::string cookieregex ( "cproxysession=([^/]+)/(\\d+)");
        boost::regex cookiere( cookieregex, boost::regex::perl );
        boost::cmatch cookiematches;
        if ( boost::regex_search( cookieline, cookiematches, cookiere) )
        {
            std::string cookiehost( cookiematches[1].first, cookiematches[1].second);
            std::string cookiesession( cookiematches[2].first, cookiematches[2].second);
            fakereferer = "http://" + proxyhost + "/" + proxyprefix + "/" +
                cookiesession + "/" + cookiehost + "/";
            h.cookiedb("Faking a referer from the cookie: '" + fakereferer + "'" );
            if ( ! boost::regex_search(fakereferer.c_str(), matches, re2))
            {
                h.db("Got a fake referer, but could not match from it");
                return false;
            }

        }
        else
        {
            h.db("Got a cookieline, but could not match it: '" +
                std::string(cookieline) + "' re=" + cookieregex);
            return false;
        }

    }

    
    std::string refbegin( matches[1].first, matches[1].second );
    std::string refsess( matches[2].first, matches[2].second );
    std::string refhost( matches[3].first, matches[3].second );
    std::string refrest( matches[4].first, matches[4].second );
    // Generate (and cache) a session file name from the refsess,
    // and try to open a dump file, so we get dumps also for
    // redirected requests
    (void) cf_sessionfilename( h, refsess );
    checkdumpfile(h);
    
    // Set up a proper response. Need to dig out the params from the headers,
    // parsing the URL failed, since we ended here.
    std::string host( z_HTTP_header_lookup(hreq->headers, "Host") );
    std::string path( hreq->path );
    h.db2("redirect: method='" + std::string(hreq->method) + "' " +
          "path='" + std::string(hreq->path) + "' "  );
    h.db2("redirect: refbegin='" + refbegin + "' refsess='" + refsess + "'");
    h.db2("redirect: refhost='" + refhost + "' refrest='" + refrest + "'" );
    h.db2("redirect: host='" + host + "' path='" + path+ "' " );
    // The path may an absolute URL, at least in the unit test
    // So clean the beginning of it
    boost::regex re3 ( "^(http://)?(" + proxyhost + ")?/?" );
    path = boost::regex_replace( path, re3, "" );
    h.db2("redirect: cleaned path='" + path + "'" );

    std::string url = refbegin + "/" + refsess + "/" + refhost + "/" + path ;
    
    if ( strcmp(hreq->method,"GET") != 0 )
    { // If not a GET request, we can not redirect, the browser will always
      // retry with a GET, loosing the POST data... So we try to set up the
      // handle for direct processing (CPXY-81)
      
        // Special case: CPXY-89: A host-relative POST does not always work,
        // and may mess the page. So we have a list of regexps to disable the
        // trick.
        std::vector<std::string>::const_iterator sl;
        std::string urlend = refhost + "/" + path;
        for ( sl = disableposttrick.begin(); sl != disableposttrick.end(); sl++ )
        {
            boost::regex skipre( *sl, boost::regex::perl );
            if ( boost::regex_search( urlend, skipre) )
            {
               h.db("Post-trick disabled. Url " + urlend + " matches " + *sl );
               return false; // without setting session, etc. So we end up with a 400 no session
            }
        }

        h.cookiedb(std::string(hreq->method) + " request, can not redirect.  "
            "Using url as is: " + url );
        h.session = refsess;
        h.targethost = refhost;
        hreq->path = odr_strdup(h.odr,url.c_str());
        return false; // signal we didn't redirect
    } // not GET

    
    // Set up a 302 - temporary redirect
    h.cookiedb("Found a good referer, redirecting to " + url );
    Z_GDU *gdu_res = h.odr.create_HTTP_Response( package.session(), hreq, 302);
    Z_HTTP_Response *hres = gdu_res->u.HTTP_Response;
    z_HTTP_header_set(h.odr, &hres->headers,
                          "Location", url.c_str() );
    // Add some headers to document what we have done
    // This way the redirection gets mentioned in metaproxy logs etc
    z_HTTP_header_set(h.odr, &hres->headers,
                          "X-Cproxy-redirect-from", path.c_str() );
    z_HTTP_header_set(h.odr, &hres->headers,
                          "X-Cproxy-redirect-to", url.c_str() );
    hres->content_len = 0; // kill the error page yaz helpfully creates
    if ( h.debug )
    { // Pass the debug output in the response, even if it is a 302
      // But not if debug_keepcontent, of course
        if ( !(h.debug & debug_keepcontent) )
        {
            z_HTTP_header_set(h.odr, &hres->headers,
                            "Content-Type", "text/plain");
            hres->code = 200; // fake a OK, so we can see the debug
            h.cookiedb("Faking an OK response, since debugging. "
                 "In reality this would be a 302");
        }
        hres->content_buf = odr_strdup(h.odr, h.debugbuf.c_str());
        hres->content_len = strlen(hres->content_buf);
    }
    h.cookiedb( "Referer-header trick: Directing to " + url);
    package.response() = gdu_res;
    return true;
} // referer_trick


// Session file path
// returns empty if any part is not defined
std::string yf::CProxy::Rep::cf_sessionfilename(yf::CProxy::Handle &h,
    std::string tricksession )
{
    if ( !h.sesfilename.empty() )  // have already cached it
    {
        //h.db2("using cached sessionfilename " + h.sesfilename );
        return h.sesfilename;
    }
    std::string session = h.session;
    if ( session.empty() )
        session = tricksession;
    if ( sessiondir.empty() || session.empty() )
    {
        h.db("Can not make sessionfilename. sesdir='" + sessiondir + "' " +
          "ses='" + h.session + "' tses='" + tricksession + "'" );
        return ""; // can not do
    }

    std::string filename = sessiondir;
    if ( filename.at(filename.length()-1) != '/' )
        filename += "/";
    filename += "cf." + session ;
    h.db2("got sessionfilename '" + filename + "' from " +
           "sesdir='" + sessiondir + "' " +
           "ses='" + h.session + "' and '" + tricksession + "'" );
    
    h.sesfilename = filename; // remember for future use
    return filename;
    
}

// Return the age of a file, in seconds
// In case of any error, including file not found, returns the
// defaultvalue. 
int yf::CProxy::Rep::file_age( yf::CProxy::Handle &h,
                         std::string filename, int defaultvalue )
{
    struct stat statbuf;
    int result = stat(filename.c_str(), &statbuf);
    if ( result )
    {
        h.db2("Not able to stat " + filename + " using default file age");
        return defaultvalue; // should not happen, except at startup, when
           // we try to check files that don't exist
    }
    time_t now = time(0);
    int age = now - statbuf.st_mtime;
    return age;
}

// Check if the session file is too old
bool yf::CProxy::Rep::cf_session_too_old(yf::CProxy::Handle &h,
                                         std::string sesfilename )
{
    int maxseconds = sessionmaxage * 60;  // convert from minutes
    int age = file_age(h, sesfilename, maxseconds+1);
      // default to a value that is too old, so we reject the session on
      // any stat error (those are unlikely), we have already checked that
      // the file exists.
    bool old = ( age > maxseconds);
    h.db2("Session age check: file " + sesfilename +
      " is " + itoa(age) + " secs old, " +
      " limit is " + itoa(maxseconds) +
      " too_old=" + itoa(old) );
    return old;
}

// Read the cf session file.
// Sets the session stuff in the handle:
// returns false if things went wrong
bool yf::CProxy::Rep::read_cf_session(yf::CProxy::Handle &h)
{
    std::string filename = cf_sessionfilename(h);
    std::ifstream sesf ( filename.c_str() );
    if ( ! sesf.is_open() )
    {
        h.db("Could not open session file " + filename );
        return false;
    }
    if ( cf_session_too_old(h,filename) )
    {
        h.db("Session has expired");
        sesf.close();
        return false;
    }
    std::vector<std::string> oldcookies;
    std::vector<std::string> newcookies;
    while (!sesf.eof()) {
        std::string line;
        std::getline(sesf, line);
        //h.db2("sesfile line: " + line );
        char tag[1024];  // must be enough
        char val[ line.length() ];  // can't be more than the whole line
        int n = sscanf(line.c_str(), " %[^ #] %[^#]", tag, val);
        // TODO - This could be a tad more strict parsing, will ignore
        // some lines silently
        if ( n == 1 )
        {
            h.db2("No value for session variable " + std::string(tag) +
             " ignoring it");
        }
        else if ( n == 2 )
        {
            h.db2("Session tag '" + std::string(tag) + "' value "+
                  "'" + std::string(val) + "'");
            if ( strcmp(tag,"CfSession") == 0 )
            {
                // ignore that line, we know what the session is
            }
            else if ( strcmp(tag,"Referer") == 0 )
            {
                h.referer = val;
            }
            else if ( strcmp(tag,"Username") == 0 )
            {
                h.username = val;
            }
            else if ( strcmp(tag,"Password") == 0 )
            {
                h.password = val;
            }
            else if ( strcmp(tag,"Proxyip") == 0 )
            {
                h.proxyip = val;
            }
            else if ( strcmp(tag,"Cookie") == 0 )
            {
                oldcookies.push_back(val);
                if ( h.isfirst )
                    h.cookiedb( "old cookie line from session: " +
                        std::string(val) );
            }
            else if ( strcmp(tag,"Set-Cookie") == 0 )
            {
                newcookies.push_back(val);
                if ( h.isfirst )
                    h.cookiedb ( "new cookie line from session: " +
                        std::string(val) );
            }
            else if ( strcmp(tag,"Custom-Pattern") == 0 )
            {
                std::string regex( " *(.*?) *Custom-Replacement *(.*?)"
                                   " *(Custom-Options (.*?))?"
                                   " *(Custom-Content-Type (.*?))?"
                                   " *$");
                boost::regex re(regex);
                boost::cmatch matches;
                if (boost::regex_match(val, matches, re) ) {
                    yf::CProxy::Handle::customreplace c;
                    c.pattern = std::string( matches[1].first, matches[1].second );
                    c.replacement = std::string( matches[2].first, matches[2].second );
                    c.options = std::string( matches[4].first, matches[4].second );
                    c.content_type = std::string( matches[6].first, matches[6].second );
                    h.db2("Got custom replace p='" + c.pattern + "' "+
                        "r='" + c.replacement + "' o='" + c.options + "' " +
                        "t='" + c.content_type + "'" );
                    h.customreplaces.push_back(c); 
                }
                else
                    h.db("Bad custom replace line in session file: ' " + 
                      std::string(val) + "'" );
            }
            else
            {
                h.db("Unknown line in session file: " + line );
            }
        }
    }
    sesf.close();
    // build the cookies vector
    // If we have even one new cookie line, we use them (and only them)
    // else, we use the old cookie lines, or nothing at all
    // Old cf-engines write only old-style cookie lines, but new engines
    // write both. In the future, we will drop the old style lines
    if ( !newcookies.empty() )
    { 
        // TODO - Process the new cookies lines
        std::vector<std::string>::const_iterator cl;
        h.db("Processing " + boost::lexical_cast<std::string>( newcookies.size() ) +
             " lines of new cookies ");
        for ( cl = newcookies.begin(); cl != newcookies.end(); cl++ )
        {
            yf::CProxy::Handle::Cookie c(*cl);
            if ( !c.name.empty() )
            {
                h.db2("New cookie line " + c.to_string() );
                h.cookies.push_back(c);
            }
            else
            {
                // TODO - How to handle this
                h.db("Could not parse Set-Cookie line '" + *cl + "'" );
            }
        }
    }
    else if ( !oldcookies.empty() )
    {
        // TODO - Process the new cookies lines
        std::vector<std::string>::const_iterator cl;
        h.db("Processing " + boost::lexical_cast<std::string>( oldcookies.size() ) +
             " lines of old cookies ");
        for ( cl = oldcookies.begin(); cl != oldcookies.end(); cl++ )
        {
            std::string regex("^ *([^;]*)?; *([^ =;]+)=(.*)$" );
            boost::regex re(regex);
            boost::cmatch matches;
            if (boost::regex_match(cl->c_str(), matches, re) )
            {
                 yf::CProxy::Handle::Cookie c("");
                 c.domain = std::string( matches[1].first, matches[1].second);
                 c.name   = std::string( matches[2].first, matches[2].second);
                 c.value  = std::string( matches[3].first, matches[3].second);
                 h.db2("Old cookie line "
                   "name='" + c.name + "' "
                   "value='" + c.value + "' "
                   "domain='" + c.domain + "'");
                 h.cookies.push_back(c);
            }
            else
            {
                // TODO - How to handle this
                h.db("Could not parse Cookie line '" + *cl + "'" );
            }
        }
    }
    else {
        if ( h.isfirst )
            h.cookiedb("No cookies from the session");
    }
    return true;
} // read_cf_session

// Read a param file
// Those are created by metaproxy, when accessing a SRU target that needs
// to use a content connector for the full-text access.
// return true on success, false otherwise.
// TODO - Better error detection and reporting
bool yf::CProxy::Rep::read_mp_params(yf::CProxy::Handle &h)
{
    std::string filename = cf_sessionfilename(h);
    if ( filename.empty() )
    {
        return false; // something badly wrong
    }
    filename += ".p";
    std::ifstream paramf ( filename.c_str() );
    if ( ! paramf.is_open() )
    { // probably no such file, by far the most common case
        h.db2("Could not open parameter file " + filename );
        return false;
    }
    h.db2("Reading param file " + filename );
    while (!paramf.eof()) {
        std::string line;
        std::getline(paramf, line);
        //h.db2("paramfile line: " + line );
        char tag[256];
        char val[1024];  // must be enough for a config line
        int n = sscanf(line.c_str(), " %[^ #:]: %[^#]", tag, val);
        // TODO - This could be a tad more strict parsing, will ignore
        // some lines silently
        if ( n == 1 )
        {
            h.db2("No value for parameter " + std::string(tag) +
             " ignoring it");
        }
        else if ( n == 2 )
        {
            
            h.db2("Parameter tag '" + std::string(tag) + "' value "+
                  "'" + std::string(val) + "'");
            if ( strcmp(tag,"connector") == 0 )
            {
                h.c_connector = val;
            }
            else if ( strcmp(tag,"auth") == 0 )
            {
                char un[1024] = "";
                char pw[1024] = "";
                n = sscanf(val, "%[^/ ]/%[^ ]",un,pw);
                if ( n > 0 )
                    h.c_user = un;
                if ( n > 1 )
                    h.c_password = pw;
            }
            else if ( strcmp(tag,"proxy") == 0 )
            {
                h.c_proxy = val;
            }
            else if ( strcmp(tag,"realm") == 0 )
            {
                h.c_realm = val;
            }
            else
            {
                h.db("Unknown line in parameter file: " + line );
                paramf.close();
                return false;
            }
        }
    }
    paramf.close();

    if ( h.c_connector.empty() )
    {
        h.db("No content connector in the param file, can't use it");
        return false; 
    }
    return true;
}


// Little helper to create URL parameters.
// Returns either "&param=value" or an empty string, if we don't have a value
std::string urlparam( std::string param, std::string value )
{
    if ( value.empty() )
        return "";
    return "&" + param + "=" + value;
}
// Create a CF session by invoking the cf engine with params
// from the param (.p) file. Used in case SRU request to metaproxy
// needs a content connector for proxying links
bool yf::CProxy::Rep::create_session(yf::CProxy::Handle &h)
{
    if ( cfengine.empty() )
    {
        h.db2("No cfengine specified, not even looking at content connectors");
        return false;
    }
    if ( ! read_mp_params(h) )
    {
        return false;
    }
    std::string sru = "http://" +
            cfengine + "/" +
            h.c_connector +
            ",cproxysession=" + h.session +
            urlparam("realm", h.c_realm ) +   // Should these be url-encoded?
            urlparam("user", h.c_user ) + 
            urlparam("password", h.c_password ) +  
            urlparam("proxy", h.c_proxy ) +
            "?version=1.2" +  // note the '?', not '&'. Here start real params.
            "&operation=searchRetrieve" +
            "&x-pquery=dummy" +
            "&maximumRecords=0" ;
    h.cookiedb("Making SRU request " + sru );
    
    yaz_url_t yaz_url = yaz_url_create();
    // Note, do not set the proxy in the url - we access the cf-zengine
    // always directly. The proxy is passed as a parameter to the connector,
    // which will store it in the session, and so it will be used for fetching
    // the actual content.
    Z_HTTP_Response *http_response = 
        yaz_url_exec(yaz_url, sru.c_str(),
                     "GET",
                     0,0,0);   // headers, content, content_len
    if ( ! http_response )
    {
        h.db("Got no HTTP response from content-connector SRU request!");
        yaz_url_destroy(yaz_url);
        return false;
    }
    if ( http_response->code != 200 )
    {
        h.db("Got a bad code " +
             boost::lexical_cast<std::string>(http_response->code) +
          " from content-connector SRU request!" );
        if ( http_response->content_len )
            h.db2( std::string( http_response->content_buf, http_response->content_len));
        yaz_url_destroy(yaz_url);
        return false;
    }
    h.db("Got a good response from content-connector SRU request");
    if ( http_response->content_len )
        h.db2( std::string( http_response->content_buf, http_response->content_len));
    yaz_url_destroy(yaz_url);
    return true;
}

/////////////////////
// Request stuff

// Make the request headers
// Add what is needed for other filters, etc
void yf::CProxy::Rep::make_request_headers(yf::CProxy::Handle &h,
                                     Z_HTTP_Request *hreq )
{
    // Add referer from the session, if not already there
    if ( ! h.referer.empty() )
    {
        z_HTTP_header_set(h.odr, &hreq->headers,
                          "Referer", h.referer.c_str() );
    }
    // http auth
    if ( ! h.username.empty() )
    {
        z_HTTP_header_add_basic_auth( h.odr, &hreq->headers,
                    h.username.c_str(), h.password.c_str());
    }
    // Proxyip (used by the http client filter, not the rewrite filter!)
    if ( ! h.proxyip.empty() )
    {
        z_HTTP_header_set(h.odr, &hreq->headers,
                   "X-Metaproxy-Proxy", h.proxyip.c_str() );
    }
    // Remove the Accept-Encoding header, we can not not process
    // gzipped data.
    (void) z_HTTP_header_remove(&hreq->headers, "Accept-Encoding" );
    // TODO - This should not be here, but in the http_rewrite module
} // make_request_headers


// Request cookie mangling
// for each session cookie
// check if already mentioned in the request. If so, ignore the sescookie
// if not, check if domain matches. If not, ignore the sescookie
// (later, check path too, when we have that on the line)
// If not ignoring, append to the cookie line
// replace the cookie header in the request.
// Note: We can be a bit lax in deciding which cookies to pass, because
// the users browser will be doing most of the tricky logic. The whole
// cproxy session expires soon enough that we don't have to worry about
// expiration time here.
// Note: The session cookies are of course not proxified, so we compare
// against a deproxified URL components. The resulting cookie line only
// contains name-value pairs, so there is no proxifying to worry about.
void yf::CProxy::Rep::request_cookies(yf::CProxy::Handle &h,
                                     Z_HTTP_Request *hreq )
{

    const char *cookie_hdr = z_HTTP_header_lookup(hreq->headers, "Cookie");
    std::string cookieline;
    if ( cookie_hdr )
    {
        cookieline = cookie_hdr;
    }
    else
    {
        h.db2("No cookies from the request");
    }
    h.cookiedb("Original request cookie line '" + cookieline + "'" );
    cookieline = ";" + cookieline; // now also the first starts with semicolon

    std::vector<yf::CProxy::Handle::Cookie>::const_iterator ci;
    if ( ! h.cookies.empty() )
    {
        // Loop through the cookies in reverse order. Old-form cookie lines
        // do not have a path, so we can not always make proper decision if
        // the cookie should be included. And even the new cookies may go
        // subtly wrong, since our domain/path rewriting can not be done
        // 100% correct.  Experience shows that using the
        // latest of otherwise matching cookies is usually a good idea.
        for(ci = h.cookies.end()-1; ci >= h.cookies.begin(); ci--)
        {
            h.db2("request cookie loop: " + ci->to_string() );
            if ( cookieline.find(";" + ci->name + "=") != std::string::npos ||
                cookieline.find(" " + ci->name + "=") != std::string::npos )
            {  // note, catches cookies separated by ';' or '; '
                h.cookiedb("Cookie '" + ci->name + "' already in the request, " +
                "not adding it from the session");
                continue;
            }
            // Skip if the cookie domain does not match the targethost
            // Note that the match is case-INsensitive.  see CP-3482
            // TODO - we use a plain substring match, not splitting into
            // components. Should probably be more correct here. 
            if ( ! boost::algorithm::iends_with(h.targethost, ci->domain) )
            {
                h.cookiedb("Cookie '" + ci->name +
                    "' has a domain '" + ci->domain + "' " +
                    "which does not match '" + h.targethost + "'. " +
                    "Not adding to the request");
                continue;
            }
            // Check the cookie path
            // ci->path has something like '/' or '/staff'
            // targetpath could be like 'staff' or 'staff/some/page.html'
            // Again, we use case-insensitive matching, since we want to
            // fail on the permissive side.
            if ( !ci->path.empty() && ci->path != "/" ) {
                h.db2("Cookie '" + ci->name + "' has path '" + ci->path + "' " +
                  "matching agaist " + h.targetpath );
                std::string cpath( ci->path.c_str() +1); // skip the leading /
                if ( ! boost::algorithm::istarts_with( h.targetpath, cpath ) )
                {
                    h.cookiedb("Cookie '" + ci->name + "' has path '" + cpath + "' "+
                       "which does not match '" + h.targetpath + "'. " +
                       "Not adding to the request" );
                    continue;
                }
            }
            // The session cookie is all right, append it to the request
            std::string newcookie =  ci->name + "=" + ci->value;
            h.cookiedb("Adding session cookie " + newcookie);
            cookieline += ";" + newcookie;
        }
    }
    while ( *cookieline.begin() == ';' )
    { // remove the ';' we added earlier
        cookieline.erase( cookieline.begin() );  
    }
    h.cookiedb("Final cookie line: '" + cookieline + "'");
    if ( !cookieline.empty() )
        z_HTTP_header_set(h.odr, &hreq->headers,
                     "Cookie", cookieline.c_str() );
    // See http://tools.ietf.org/html/rfc6265 for more info about the cookie
    // headers.
    // Dump the final request cookies in the cookie buf, in a readable way
    boost::regex regex( "[^;]+[; ]*" );
    boost::sregex_token_iterator iter(cookieline.begin(), cookieline.end(), regex, 0);
    boost::sregex_token_iterator end;
    for( ; iter != end; ++iter ) 
        h.cookiebuf += "  " + *iter + "\n";
}

//////////////////
// Postprocessing the response

// Add a cproxy-session cookie
//   cproxysession=www.indexdata.com/200
// That is, target host name, and the session number
// TODO - It might make sense to check if the request already has this
// cookie, and in that case not set it in the response. The browser has
// it, so it should not be necessary to add it. And our policy is to make
// minimal changes.  Remember to compare the value too, or we may get
// stuck with some other session!
void yf::CProxy::Rep::add_resp_cookie(yf::CProxy::Handle &h,
                                      Z_HTTP_Response *hres )
{
    std::string cookieline = "cproxysession=" +
            h.targethost + "/" +  h.session +
            "; Path=/" ;  // make it for the whole proxy domain, so we get
    // it back even with unproxified host-relative links that can have
    // what ever path    
    z_HTTP_header_add(h.odr, &hres->headers,
                      "Set-Cookie", cookieline.c_str() );
    h.cookiedb("Added cproxy session cookie " + cookieline );
} // add_resp_cookie


// Proxify Set-Cookie lines in the response
// See CP-3416, CPXY-49, and the lengthy comment at the end of this file
// Summarizing:
//  * Keep the line as is, except for domain and path
//  * Set the domain to our proxydomain, if mentioned at all
//  * The path consists of
//    * proxy prefix and session
//    * original domain (or target domain, if not mentioned on the cookie line)
//    * original path, if any (or current path, up to but not including ?#)
//  * If the original host was a wildcard (starts with a dot  .foo.com)
//    * Remove the dot before appending it in the new path
//    * Create an alternative cookie line for the whole session

void yf::CProxy::Rep::response_cookies(yf::CProxy::Handle &h,
                                      Z_HTTP_Response *hres )
{
    Z_HTTP_Header *hp = hres->headers;
    for (; hp; hp = hp->next)
    {
        if (yaz_strcasecmp(hp->name,"Set-Cookie") == 0)
        {  // name=value; path=/foo; domain=.some.com; expires=... ; rest=...
            std::string line(hp->value);
            std::string origdomain;
            std::string origpath;
            bool needaltcookie = false; // do we need an extra wildcard cookie?
            boost::smatch matches;
            h.cookiedb("Set-Cookie line '" + line + "'" );
            boost::regex re1(" *; *domain=([^ ;]*)", boost::regex::icase);
            if ( boost::regex_search(line, matches, re1) ) {
                origdomain = std::string( matches[1].first, matches[1].second );
                if ( origdomain[0] == '.' )  // .indexdata.com
                {
                    origdomain.erase(0,1);  // make for indexdata.com
                    needaltcookie = true;  // and a wider alternative later
                }
            }
            else
            { 
                origdomain = h.targethost;
            }
            boost::regex re2(" *; *path=([^ ;]*)", boost::regex::icase);
            if ( boost::regex_search(line, matches, re2) )
            {
                origpath = std::string( matches[1].first, matches[1].second );
            }
            else { // extract path from target path, skipping parameters
                origpath = "/" + h.targetpath;
                h.db2(" initial default path '" + origpath + "'" );
                size_t pos = origpath.find_first_of("?#");
                if ( pos != std::string::npos )
                    origpath.erase( pos );
                h.db2(" final default path '" + origpath + "'" );
                line += "; path=/XXX"; // will be replaced later in the process
                                       // but something must be there
            }
            h.db2(" origdomain='" + origdomain + "' origpath='" + origpath + "'" );
            std::string newpath = "/" + proxyprefix + "/" + h.session ;
            newpath += "/" + origdomain;  
            newpath += origpath;  // starts with a slash already

            // Replace the domain, if found. If not, it's all right, default is to
            // current domain, which will be seen as the proxyhost. Keep the
            // spelling (upper/lowecasing) of the word 'domain'
            // first, clean the proxyhost from port numbers
            std::string cleanproxyhost =  boost::regex_replace( proxyhost,
                      boost::regex (":[0-9]+$", boost::regex::icase ),
                                    "" );
            line = boost::regex_replace( line,
                      boost::regex ("(domain)=([^ ;]+)", boost::regex::icase ),
                                    "$1=" + cleanproxyhost );
            // And replace the path
            line = boost::regex_replace( line,
                         boost::regex("(path)=([^ ;]+)", boost::regex::icase ),
                                      "$1=" + newpath );
            h.cookiedb("     final line '" + line + "'" );
            hp->value = odr_strdup( h.odr, line.c_str() );
            if ( needaltcookie )
            {  // create a second cookie line. We can not do proper wildcarding
               // so we make one that is valid for the whole session.
                newpath = "/" + proxyprefix + "/" + h.session + "/";
                // Replace the domain, if found. If not, it's all right, default is to
                // current domain, which will be seen as the proxyhost. Keep the
                // spelling (upper/lowecasing) of the word 'domain'
                std::string altline = boost::regex_replace( line,
                        boost::regex ("(domain)=([^ ;]+)", boost::regex::icase ),
                                        "$1=" + cleanproxyhost );
                // And replace the path
                altline = boost::regex_replace( altline,
                            boost::regex("(path)=([^ ;]+)", boost::regex::icase ),
                                        "$1=" + newpath );
                h.cookiedb("     extra line '" + altline + "'" );
                // Link it after the current header line
                Z_HTTP_Header *ehp = (Z_HTTP_Header *) odr_malloc(h.odr, sizeof(*ehp));
                ehp->name = odr_strdup(h.odr, "Set-Cookie");
                ehp->value = odr_strdup(h.odr, altline.c_str() );
                ehp->next = hp->next;
                hp->next = ehp;
                hp = hp->next; // don't process the new one

            }
        }
    }
} // response_cookies

// Replace stuff in the content, if needed
void yf::CProxy::Rep::postprocess_content(yf::CProxy::Handle &h,
                                      Z_HTTP_Response *hres )
{
    const char *ctype = z_HTTP_header_lookup(hres->headers, "Content-Type");
    bool replaced = false;
    
    if ( ctype && !h.customreplaces.empty()  )
    { // if we didn't find the content-type, it won't match. Most likely
      // we won't even have any content to work with, as in a redirect response
        std::string content = std::string(hres->content_buf, hres->content_len);
        std::vector<yf::CProxy::Handle::customreplace>::const_iterator ri;
        for(ri = h.customreplaces.begin(); ri != h.customreplaces.end(); ri++) {
            if ( !ri->content_type.empty() )
            {
                boost::regex ctyperegex( ri->content_type );
                if ( !regex_search( ctype, ctyperegex ) )
                {
                    h.db2("Custom pattern '" + ri->pattern + "' skipped because " +
                      "content type '" + ctype + "' does not match " +
                      "'" + ri->content_type + "'" );
                    continue;
                }
            }
            boost::regex::flag_type b_mode = boost::regex::perl;
            if (ri->options.find_first_of('i') != std::string::npos)
                b_mode |= boost::regex::icase;
            boost::regex re(ri->pattern, b_mode);
            if ( regex_search(content, re) )
            { // TODO - it is tad wasteful to search first, and then make a
              // replace. 
                content = regex_replace( content, re, ri->replacement);
                replaced = true;
            }
        } // for each customreplace
        if ( replaced)
        { // Do not touch content bu/len, unless we actually changed something
          // strlen is no good for binary content, and generally we want minimal
          // changes!
            char buf[32];
            sprintf(buf, "%d to %d", hres->content_len, strlen(content.c_str()) );
            h.db2("After custom replaces, the content length went from " +
              std::string(buf) );
            hres->content_buf = odr_strdup(h.odr, content.c_str());
            hres->content_len = strlen(hres->content_buf);
        }
    }
} // postprocess_content


//////////////////
// Process itself
void yf::CProxy::process(mp::Package &package) const
{
    Z_GDU *gdu_req = package.request().get();
    if (gdu_req && gdu_req->which == Z_GDU_HTTP_Request)
    {
        Z_HTTP_Request *hreq = gdu_req->u.HTTP_Request; 

        assert(hreq);

        yf::CProxy::Handle h( m_p->debug );
        h.cookiedb("====");
        h.cookiedb( std::string(hreq->method) + " " + std::string(hreq->path) );

        m_p->parse_url(h, hreq );

        if ( h.session.empty() )
        {
            if ( m_p->referer_trick( h, hreq, package ) )
            {
                if ( h.debug & debug_verbose )
                {
                    Z_GDU *gdu_resp = package.response().get();
                    m_p->dump_gdu( gdu_resp, h,
                        "Response headers before redirect" );
                }
                h.savecookietrace();
                h.closedumpfile();
                return;
            }
        }
        if ( h.session.empty() ) // even after referer-trick
        {
            m_p->error_page( package, hreq, h,
                            400, "No cproxy session found in the URL");
            h.closedumpfile();
            return;
        }

        if ( ! m_p->read_cf_session(h) )
        {
            if ( m_p->create_session(h) )
            {
                h.cookiedb("Created a session all right, it seems");
                if ( ! m_p->read_cf_session(h) )
                {
                    m_p->error_page( package, hreq, h,
                                    400, "Could not create session");
                    h.closedumpfile();
                    return;
                }
            }
            else 
            {
                m_p->error_page( package, hreq, h,
                                400, "No session");
                h.closedumpfile();
                return;
            }
        }

        // Make the headers for the rewrite filter
        m_p->make_request_headers(h, hreq);
        m_p->request_cookies(h, hreq);

        m_p->dump_gdu( gdu_req, h, "Request headers:" );
        if ( h.debug & debug_cookie )
        {
            m_p->check_cookietrace(h);
        }

        
        if ( h.debug & debug_nomove )
        {
            h.db("debug stop required, not moving the packet, just dumping debug");
            Z_GDU *gdu_res = h.odr.create_HTTP_Response(
                                package.session(), hreq, 200);
            Z_HTTP_Response *hres = gdu_res->u.HTTP_Response;
            z_HTTP_header_set(h.odr, &hres->headers,
                                "Content-Type", "text/plain");
            hres->content_buf = odr_strdup(h.odr, h.debugbuf.c_str());
            hres->content_len = strlen(hres->content_buf);
            package.response() = gdu_res;
            package.request() = gdu_req; // We have messed with the request
            h.savecookietrace();
            h.closedumpfile();
            return;
        }
        else 
        {
            h.db2("Done preprocessing. Moving the packet along");
            package.request() = gdu_req;
            package.move();
            h.db2("Packet moved all right. Postprocessing...");
        }
        Z_GDU *gdu_res = package.response().get();
        if ( ! gdu_res ) {
            h.db("No response in package!");
        } 
        else
        {
            Z_HTTP_Response *hres = gdu_res->u.HTTP_Response;
            
            h.cookiebuf += "= HTTP response " + itoa(hres->code) + "\n";
            const char *location = z_HTTP_header_lookup(hres->headers, "Location");
            if (location)
                h.cookiebuf += "Location: " + std::string(location) + "\n";
            
            m_p->response_cookies(h,hres);
            m_p->add_resp_cookie(h, hres);
            m_p->postprocess_content(h, hres);
            
            if ( !h.debug )
            {
                // no debug, let the output pass through unmodified
                // but log it in the dump file anyway
                if (hres )
                {
                    m_p->dump_gdu( gdu_res, h, "Response" );
                }
                else
                {
                    h.db("");
                    h.db("No http response to work with");
                }
            }
            else
            {
                // Dump debug log info and the content
                if (hres)
                {
                    if ( h.debug && debug_verbose)
                        m_p->dump_gdu( gdu_res, h,
                            "Response headers" );
                    if ( ! (h.debug & debug_keepcontent) )
                    { // Forct to text/plain by default, easier to read on a browser
                        z_HTTP_header_set(h.odr, &hres->headers,
                                        "Content-Type", "text/plain");
                        if ( hres->code >= 300 && hres->code < 310 ) // redirect
                            hres->code = 200; // Force it to be OK,
                          // so the browser
                          // will not try to follow a redirect, and hide the
                          // debug output
                    }
                    std::string content = std::string(hres->content_buf, hres->content_len);
                    h.debugbuf += content;
                    hres->content_buf = odr_strdup(h.odr, h.debugbuf.c_str());
                    hres->content_len = strlen(hres->content_buf);
                    //package.response() = gdu_res;  // magically copies into its
                        // own odr, so h.odr may safely go out of scope
                    char dbgbuf[1024];
                    sprintf( dbgbuf, "len=%d buf=%p",
                             hres->content_len, hres->content_buf );
                    h.db2("Checking content: " + std::string(dbgbuf) );
                }
            }
        }
        package.response() = gdu_res;  // magically copies into its
                        // own odr, so h.odr may safely go out of scope
        h.savecookietrace();
        h.db2("All done");
        h.closedumpfile();
    }
    else
        package.move();
} // process


////////////////
// Configuration

// Read the CF config file. This is the file the CF engine uses to set up the
// proxified URLs. Rather than repeating the stuff in our own config, we just
// point to the CF file.
void yf::CProxy::Rep::read_cf_config(void)
{
    std::ifstream cfconf(cfconfig.c_str());
    if ( ! cfconf.is_open() )
    {
        throw mp::filter::FilterException
            ("Error opening CF config file " + cfconfig );
    }
    while (!cfconf.eof()) {
        std::string line;
        std::getline(cfconf, line);
        //yaz_log(YLOG_LOG,"config line '%s'", line.c_str() );
        char tag[256];
        char val[1024];  // must be enough for a config line
        int n = sscanf(line.c_str(), " %[^ #:] : %[^#]", tag, val);
        // TODO - This could be a tad more strict parsing, will ignore
        // some lines silently
        if ( n == 2 )
        {
            if ( debug)
            {
                yaz_log(YLOG_LOG,"[cproxy] config tag '%s' value '%s'", tag, val );
            }
            if ( strcmp(tag,"proxyhostname") == 0 )
            {
                // For historical reasons, the 'proxyhostname' is in form
                //   pxy.indexdata.com/XXX/node102
                // but we need to access the hostname (pxy.indexdata.com)
                // and the prefix (XXX/node102)
                boost::regex re( "^([^/]+)/?(.*)$"  );
                  // note that the slash and prefix are both optional
                boost::cmatch matches;
                if (boost::regex_match(val, matches, re) )
                {
                    proxyhost =
                      std::string ( matches[1].first, matches[1].second );
                    proxyprefix =
                      std::string ( matches[2].first, matches[2].second );
                    if ( debug)
                    {
                          yaz_log(YLOG_LOG,"[cproxy] got proxy host '%s' and prefix '%s'",
                            proxyhost.c_str(), proxyprefix.c_str() );
                    }

                } else {
                    yaz_log(YLOG_WARN,"[cproxy] proxyhost '%s' can not be split "
                        "into host and prefix", val );
                    throw mp::filter::FilterException
                        ("Bad value '"
                        + std::string((const char *) tag)
                        + "' for proxyhostname in cf-config file "
                        + cfconfig);
                    // TODO - Can this ever happen, with our permissive regex?
                }
                // Keep the whole thing too
                proxyhostname = std::string(val);
                // Validate we don't have a bad prefix
                // The prefix may not contain a segment that is digits only
                boost::regex re_check( "/\\d+/"  );
                if ( boost::regex_search(proxyhostname+"/",re_check) )
                {
                    yaz_log(YLOG_WARN,"[cproxy] "
                        "Bad proxy prefix in proxyhostname '%s' "
                        "in cf config file %s. ",
                        val, cfconfig.c_str() );
                    throw mp::filter::FilterException
                        ("Invalid value '"
                        + std::string((const char *) val)
                        + "' for proxyhostname in cf-config file "
                        + cfconfig);
                }
            }
            else if ( strcmp(tag,"sessiondir") == 0 )
            {
                sessiondir = std::string(val);
            }
            else if ( strcmp(tag,"cfengine") == 0 )
            {
                cfengine = std::string(val);
            }
            else {
                throw mp::filter::FilterException
                    ("Unknown element '"
                    + std::string((const char *) tag)
                    + "' in cproxy config file "
                    + cfconfig);
            }
        }
    }
    cfconf.close();
} // read_cf_config


// Process the configuration from the given xmlNode
void mp::filter::CProxy::configure(const xmlNode * ptr,
                                   bool test_only,
                                   const char *path)
{
    for (ptr = ptr->children; ptr; ptr = ptr->next)
    {
        if (ptr->type != XML_ELEMENT_NODE)
            continue;
        else if (!strcmp((const char *) ptr->name, "debug"))
        {
            m_p->debug = mp::xml::get_int(ptr, 0);
        }
        else if (!strcmp((const char *) ptr->name, "cfconfig"))
        {
            m_p->cfconfig = mp::xml::get_text(ptr);
        }
        else if (!strcmp((const char *) ptr->name, "sessionmaxage"))
        {
            m_p->sessionmaxage = mp::xml::get_int(ptr,0);
        }
        else if (!strcmp((const char *) ptr->name, "disableposttrick"))
        {
            m_p->disableposttrick.push_back( mp::xml::get_text(ptr) );
        }
        else
        {
            throw mp::filter::FilterException
                ("Bad element '"
                 + std::string((const char *) ptr->name)
                 + "' in cproxy filter");
        }
    }
    if ( m_p->cfconfig.empty() ) {
        m_p->cfconfig = cfconfigdefault;
        yaz_log(YLOG_WARN,"[cproxy] No cfconfig set in config, using default %s",
                cfconfigdefault);
    }
    m_p->read_cf_config();
    m_p->cleansessionfiles(m_p->debug);
    yaz_log(YLOG_LOG,"[cproxy] filter_cproxy configured all right");
    // TODO - Validate that all necessary configs are set
}

/////////////////////////
// Housekeeping

// Helper to remove a dump dir, and all files in it.
// Does not recurse any deeper than that, so may leave a ghost directory
// returns 1 if it removed a dir, or 0 if not
// Ignores all errors
int remove_dump_dir( std::string dumpdir )
{
    DIR *dir;
    struct dirent *entry;
    if ((dir = opendir(dumpdir.c_str() )) == NULL) {
        return 0; // ignore the errors here
    }
    while ((entry = readdir(dir)) != NULL)
    {
        if ( strcmp(entry->d_name, ".") == 0 ||
             strcmp(entry->d_name, "..") == 0 )
        {
            continue; // can't remove those
        }
        std::string path = dumpdir + "/" + std::string(entry->d_name) ;
        remove ( path.c_str() ); // ignore errors here too
    }
    closedir( dir );
    rmdir( dumpdir.c_str() );
    return 1;
} // remove_dump_dir

// Remove old session files, etc.
void yf::CProxy::Rep::cleansessionfiles( int debug )
{
    yf::CProxy::Handle h( debug );  // for debug logs
    struct dirent *ent;
    DIR *dir= opendir ( sessiondir.c_str() );
    if ( dir == NULL ) {
        yaz_log(YLOG_WARN,"Could not open session dir '%s'",
            sessiondir.c_str() );
        throw mp::filter::FilterException
                ("Could not read sessiondir '" + sessiondir + "' for cleanup" );
    }
    std::vector<std::string> sessions; // lists the raw session names to be
      // deleted. We need to build a list, because we may meet the session file,
      // its .p file, and dump directory in an arbitrary order, and should check
      // dates for all of them before deleting any.
    
    if (dir != NULL) {

        while ((ent = readdir (dir)) != NULL) {
            if ( strncmp(ent->d_name, "cf.", 3) != 0 ) {
                continue;  // quick check to skip most files that are not ours
            }
            // clean the name to cf.9999, dropping any suffixes like .p or .dump
            boost::regex re( "^(cf\\.[0-9]+).*" );
            boost::cmatch matches;
            int maxage = sessionmaxage * 60;  // convert from minutes
            if ( boost::regex_match( ent->d_name, matches, re) )
            {
                std::string name ( matches[1].first, matches[1].second );
                name = sessiondir + "/" + name;
                int age = file_age( h, name, maxage );
                int age_p = file_age( h, name + ".p", maxage );
                int age_d = file_age( h, name + ".dump", maxage);
                if ( age >= maxage && age_p >= maxage && age_d >= maxage)
                {
                    sessions.push_back(name);
                }
            } // cf.
        } // while
        closedir (dir);
        
        // Now we have a list of session file names (without suffixes).
        // Delete them all. Ignore most errors. Note that we may have the same
        // base name up to three times, and after the first delete, it will be
        // all gone.
        // TODO We ought to check for errors anyway, and log them, at least
        // if they are not file_not_found.
        std::vector<std::string>::const_iterator sn;
        int removed = 0;
        for ( sn = sessions.begin(); sn != sessions.end(); sn++ )
        {
            if ( remove( sn->c_str() ) == 0 )
                removed ++;
            if ( remove( (*sn+".p").c_str() ) == 0 )
                removed ++;
            removed += remove_dump_dir( *sn + ".dump" );
        }
        if ( removed )
            yaz_log(YLOG_LOG,"[cproxy] Cleaned up %d old (session) files", removed);
    }
}


///////////////////////////////////
// Boilerplate code for MP filters

static mp::filter::Base* filter_creator()
{
    return new mp::filter::CProxy;
}

extern "C" {
    struct metaproxy_1_filter_struct metaproxy_1_filter_cproxy = {
        0,
        "cproxy",
        filter_creator
    };
}

/*
 * Additional comments about the code:
 
 ====
 Cookie processing:

 Cookies have name=value, maybe a path, and optionally domain, which can
 be absolute (foo.com), or wildcard (.foo.com). That makes six
 combinations. We need all cookies point to the domain of the proxy, and
 have as good paths as possible.
 1) name=value
    n=v; domain=proxydomain; path=prefix/session/current-host/current-path
 2) name=value; path=/foo
    n=v; domain=proxydomain; path=prefix/session/current-host/foo
 3) name=value; domain=some.com
    n=v; domain=proxydomain; path=prefix/session/some.com
 4) name=value; domain=.some.com
 THIS CAN NOT BE DONE 100%
 We would need a wildcard domain in the middle of our path! Best approach
 is to make two cookies, one for the whole some.com, and one for the whole
 current session. This can still fail if the site sets two-level cookies,
 like .static.site.com for images.static.site.com, and cookies with the 
 same name for dynamic.site.com, but this is less likely.
 
    n=v; domain=proxydomain; path=prefix/session/some.com
    n=v; domain=proxydomain; path=prefix/session/
 5) name=value; path=/foo; domain=some.com
    n=v; domain=proxydomain; path=prefix/session/hostname/foo
 6) name=value; path=/foo; domain=.some.com
 THIS CAN NOT BE DONE 100%
 Same considerations as for 4. This seems a tad less likely situation.
 On the other hand, we necessarily loose the path for the global wildcard
 cookie, which may give problems.
    n=v; domain=proxydomain; path=prefix/session/some.com/foo
    n=v; domain=proxydomain; path=prefix/session/


 So, to simplify:
  * name=value stays always the same  (later we could check if the value
    contains a domain, but I haven't come across such yet)
  * domain is always our proxy domain. Keep the uppercasing of the 'domain'!
    If there is no domain clause in the header, we don't need to add one, it
    should default to the current one, which is the proxydomain. (We might
    even get away omitting it always!)
  * All the magic happens in the path
    * Always start with the prefix and session
    * If we have a domain, it comes next. Otherwise the current domain
    *  If we have a wildcard domain, make a second cookie header
    * If we have a path, it comes next. (but not on the second cookie line)
 Other things to consider:
  * Keep the rest of the cookie line, for expiry times etc.

 See also http://tools.ietf.org/html/rfc6265, especially section 5.3
 "Storage Model". It explains what a client should do in various situations.
 We can hope most browsers will do something similar.

 Also note that we do not need to be perfect in filtering cookies, presumably
 the browser has done that already. We just need to maximize the chances that
 we set cookies in a way that the browser can return them to us. Better to
 have an extra cookie, than to miss one.
 


 */



/*
 * Local variables:
 * c-basic-offset: 4
 * c-file-style: "Stroustrup"
 * indent-tabs-mode: nil
 * End:
 * vim: shiftwidth=4 tabstop=8 expandtab
 */

