# Apache2 config for the cfproxy
# The engine hard-codes the address cfproxy.indexdata.com
# The DNS resolves *.cfproxy.indexdata.com here, so we get also
# the session numbers, as in
#  http://144180.cfproxy.indexdata.com/americangovernment.abc-clio.com/Search/Display/201763?terms=helsinki
# (In the old days, hpxy was used instead of cfproxy. That should no longer
# be mentioned anywhwere)
# 
# There is also cfproxy2.indexdata.com, which does not need to
# be publicly known (although it is for now), into which all the
# requests are proxied.


##########
# THIS IS HEIKKI'S TEST SETUP FOR hpxy.indexdata.com on flurry
# 

<VirtualHost *:80>
  ServerName hpxy.indexdata.com
  ServerAlias *.hpxy.indexdata.com
  
  DocumentRoot /home/heikki/cf/cfproxy
  CustomLog /var/log/apache2/cfproxy-access.log combined
  ErrorLog /var/log/apache2/cfproxy-error.log

  LogLevel Info
  
  # Disable compression, can't replace on that
  RequestHeader unset Accept-Encoding

  # Remember the first element in the path for later rewrite
  # This applies for old-form URLs  9999.cfproxy.indexdata.com/google.com/foo
  SetEnvIf Request_URI ^/?([^/]+) REALHOST=$1

  # Remember session and targethost from new-form URLs
  # New format: http://cfproxy.indexdata.com/pre/fix/999999/targethost/targetpath...
  SetEnvIf Request_URI ^/((.*?/)?[0-9]+)/([^/]+) PREFIX=$1
  SetEnvIf Request_URI ^/(.*?/)?([0-9]+)/[^/]+ SESSION=$2
  SetEnvIf Request_URI ^/(.*?/)?[0-9]+/([^/]+) TARGETHOST=$2
 
   
  ProxyRequests Off
  <Proxy *>
    Order deny,allow
    Allow from all
  </Proxy>

  # Enable advanced rules (conditions etc)
  ProxyHTMLInterp On

  # Enable extended proxying (doesn't seem to help much)
  # ProxyHTMLExtended On   

  ProxyPass         / http://hpxy2.indexdata.com/
  ProxyPassReverse  / http://hpxy2.indexdata.com/
  #ProxyHTMLURLMap http:// /

  # Debug logging. Not very useful.
  # ProxyHTMLLogVerbose On

  SetOutputFilter proxy-html

  <Location />
      # Debug to see that variables are set up right:
      #ProxyHTMLURLMap  /  /R=${REALHOST|NoEnvVar}/P=${PREFIX|NoPrefix}/S=${SESSION|NoSession}/T=${TARGETHOST|NoTargetHost}/ VL

      # New mapping
      # a+b) absolute link http://foo.com. Make relative (so the browser
      # will add the proxy host), and add prefix and session in the beginning.
      # The target host will be in the URI already.
      ProxyHTMLURLMap  http://      /${PREFIX|NoPrefix}/ V^L  TARGETHOST

      # c) Relative link. Add both prefix,session and host. The browser will
      # add the proxyhost.
      ProxyHTMLURLMap  /  /${PREFIX|NoPrefix}/${TARGETHOST|NoTargetHost}/ V^L TARGETHOST

      # d) plain link "images/foo.png" - do not touch. The browser will do it all

      # Old mapping:
      ProxyHTMLURLMap  /      /${REALHOST|NoEnvVar}/ V  !TARGETHOST
      ProxyHTMLURLMap http:// /                      ^  !TARGETHOST

      # The options are
      #   V for expanding variables
      #   L for last match, don't try more rules if this matches
      #   ^ match beginning of url
      # The environment variables should always be there, the alternatives
      # ("NoSession" etc) are mostly a debugging aid.
      # The last element is a condition that switches between new and old form.
        
  </Location>

</VirtualHost>


# cfproxy2, that runs our own proxying script
<VirtualHost *:80>
  ServerName hpxy2.indexdata.com
  ServerAlias hpxy2 
  DocumentRoot /home/heikki/cf/cfproxy
  CustomLog /var/log/apache2/cfproxy2-access.log combined
  ErrorLog /var/log/apache2/cfproxy2-error.log

  LogLevel Info
  
  # Redirect everything to our proxy script
  #AliasMatch ^/ /home/heikki/cf/cfproxy/proxy.pl
  AliasMatch ^/ /home/heikki/cf/cfproxy/proxy-redirect.pl
  
  # Pass the name of the proxy host to the script, for replacing
  # SetEnv PROXYHOST cfproxy.indexdata.com
  # Not used any more, now it comes from the config file

  <Directory /home/heikki/cf/cfproxy>
    Options ExecCGI
    AllowOverride None
    AddHandler cgi-script .pl
  </Directory>
</VirtualHost>
