# linesearch.R -- searches a ray for a minimum
# written by Andrew Clausen <clausen@econ.upenn.edu> in 2007.

# does quadratic interpolation to solve for the minimum on [x0, x1]
#	f(x) = ax^2 + bx + c
#
# (1) f(x0) = f0	=> f0 = a x0^2 + b x0 + c
# (2) f(x1) = f1	=> f1 = a x1^2 + b x1 + c
# (3) f'(x0) = g0	=> g0 = 2a x0 + b
#
# (1)-(2)	f0 - f1 = a (x0^2 - x1^2) + b(x0 - x1)
# from (3):	b = g0 - 2a x0
# from (1)-(2):	f0 - f1 = a (x0^2 - x1^2) + (g0 - 2a x0)(x0 - x1)
#		f0 - f1 - g0(x0 - x1) = a (x0^2 - x1^2) - 2a x0(x0 - x1)
#		f0 - f1 - g0(x0 - x1) = a [(x0^2 - x1^2) - 2 x0(x0 - x1)]
#		a = [f0 - f1 - g0(x0 - x1)] / [(x0^2 - x1^2) - 2 x0(x0 - x1)] 
#
# Then, f'(x) = 2ax + b.  The minimum is at x = -2a/b
interpolate.min <- function(x0, x1, f0, f1, g0)
{
	dx <- x0 - x1
	dx2 <- x0^2 - x1^2

	a <- (f0 - f1 - g0*dx) / (dx2 - 2*x0*dx)
	b <- g0 - 2*a*x0

	x <- -2*a / b
	if (is.na(x) || (x < min(x0, x1)) || (x > max(x0, x1)))
		return ((x0 + x1) / 2)
	x
}

# Implements the More-Thuente linesearch algorithm.  The notation is taken from
# Nocedal and Wright (2006).  This function returns an approximate solution to
#
#	argmin_{alpha \in [0, alpha.max]} phi(alpha).
#
# To avoid getting stuck in local minima, the algorithm attempts to find a
# point that satisfies the Wolfe conditions:
#  - Armijo condition: the function has decreased enough.  This is controlled
# by the "ftol" parameter.
#  - curvature condition: the function has flattened out enough.  This is
# controlled by the "gtol" parameter.
# (It will succeed at this, unless machine precision gets in the way.)
#
# The basic idea of the algorithm is:
#  - do a crude search to find an interval that contains a local minimum
# that satisfies the Wolfe conditions.
#  - do a bisection-like search on that interval, until a point is found
# that satisfies the Wolfe conditions.
#
# In particular, the algorithm doesn't attempt to find a local minimizer.
# It just tries to find a rough guess, which is much faster.  The function
# that calls linesearch will do it many times.  Since the Wolfe conditions
# are relative to the starting point of "linesearch", each subsequent call
# will give tighter approximations.
#
# The algorithm will terminate if the objective eventually starts decreasing,
# or a finite constraint on alpha is specified.
#
# Parameters:
#  - phi : R -> R is the objective function to minimize.
#  - phi_ : R -> R is the derivative of phi.
#  - alpha1 is the starting point.
#  - alpha.max is an upper-bound constraint on alpha (can be Inf).
#  - ftol and gtol are convergence parameters
#  - stepsize is a parameter for the first ("crude") step of the algorithm
linesearch <- function(phi, phi_, alpha1, alpha.max,
		       ftol=0.0001, gtol=0.9, stepsize=3)
{
	# the Wolfe conditions are: (we want both to be TRUE)
	armijo <- function(alpha)
		phi(alpha) < phi(0) + ftol * alpha * phi_(0)
	curvature <- function(alpha)
		abs(phi_(alpha)) <= gtol * abs(phi_(0))

	zoom <- function(alpha.lo, alpha.hi) {
		for (i in 1:30) {
			alpha <- interpolate.min(
					alpha.lo, alpha.hi, phi(alpha.lo),
					phi(alpha.hi), phi_(alpha.lo))

			if (!armijo(alpha) || (phi(alpha) >= phi(alpha.lo))) {
				alpha.hi <- alpha
			} else {
				if (curvature(alpha))
					return(alpha)
				# started going uphill?
				if (phi_(alpha) * (alpha.hi - alpha.lo) >= 0)
					alpha.hi <- alpha.lo
				alpha.lo <- alpha
			}
		}
		0	# not enough progress; give up.
	}

	stopifnot(phi_(0) < 0)

	alpha_ <- 0
	alpha <- ifelse(alpha1 >= alpha.max, alpha.max/2, alpha1)
	for (i in 1:100) {
		if (i > 1 && (phi(alpha) >= phi(alpha_)))
			return(zoom(alpha_, alpha))
		if (!armijo(alpha))
			return(zoom(alpha_, alpha))
		if (curvature(alpha))
			return(alpha)
		if (phi_(alpha) >= 0)
			return(zoom(alpha, alpha_))
		alpha_ <- alpha
		alpha <- min((alpha + alpha.max) / 2, alpha * stepsize)
	}
}
