(*
 * Lemke's method Linear Complementarity Problem solver
 *
 * 5/22/01 - checker
 *
 *)
  
open Linalg
  
(*
   Lemke's algorithm for LCPs and mixed LCPs (MLCPs):

   LCP:
   w = Mz + q
   w,z >= 0, w'z = 0

   or
   
   Iw - Mz = q
   w,z >= 0, w'z = 0

   general MLCP:

   y = Ax + Bz + b
   w = Cx + Mz + q
   w,z >= 0, w'z = 0, x free, y = 0

   or

   Ax + Bz - 0y + b = 0
   Cx + Mz - Iw + q = 0
   
   x,y are pseudo-complementary, since x'y = 0 always

   To solve the MLCP, we need to do an advanced Lemke start.  We need to
   pick an initial basic set basis that has all of the free variables
   pivoted from nonbasic to basic.  We do this by block pivoting the
   necessary variables from nonbasic to basic via the tableau method.
   
   Then we can start running Lemke's algorithm on the primary ray.

   Note: if A' (A' = A^{-1}) exists:

   Ax = -Bz - b
   x = -A'Bz - A'b
   w = -CA'Bz - CA'b + Mz + q
   w = [-CA'B + M]z + [-CA'b + q]

   Is there any way to form this and solve the reduced problem without
   actually inverting A?  Forming A' is bad.

 *)
  
(* @todo
   
   doing the mixed-lcp thing:

   + convert lcp.ml to use the linalg matrix structure

   ? make it not copy the matrix and pivot covering vector separately
   ? @todo should I bother with this, it's an optimization!?!
   ? this means have q,d,m in lcp, optionally pivot d...makes sets harder, special case

   + write block pivot
   
   + convert lemke to take info on an initial block pivot for
   the advanced start and for free variables (and 0 varibles?)
   
   find a complementary basis including all x's (and y's) and whatever
   z's/w's might be necessary to make the basis invertible, and perform an
   advanced lemke start with that basis as the basic set

   + assume free variables are in basic after block pivot?

   + should this be num_pivot num_free thing, or two different
   sets you pass in?  how to expose set thing?

   - should work for compatible systems as well?  (row and column pivots)

   - do I need to do complete pivot or just enough to get the pivot
   correct?  figure out which elements affect which other elements in
   the tableau...if we'll never pivot the x's out of basic, then why
   complete their entries?  in fact, all the x rows of the tableau
   are irrelevant after the advanced start, just the q column matters
   for the final result, right?



   - figure out what the advanced start basis should be
   - just the free variables pivoted, or need others?


   questions for munson (also look on the paper):

   - don't need to bother forming new d vector, just slam in 0,1 after
   block pivot, right?
 *)   
  
(*
   @todo
   - it's not clear if I want to put q in the tableau
   or leave it separate...I think the non-tab version wants it separate
   - uh, this is garbage...use Ax=b solvers and updating for numerical
   stability...but this is only O(n^2) inside the loop...hmm

 *)

(* @todo local module? no local open? *)
(*
   var is type of variables in solution:
   W = initial basic
   Z = initial nonbasic
   X = free
   Y = 0
 *)
type var = W of int | Z of int | X of int | Y of int
type lcp = {
    t : matrix;  (* tableau *)
    q : matrix;
    basic_set : var array;
    nonbasic_set : var array;
  } 
exception Found of int (* @todo hack, shouldn't this be built in? or local? *)
let debug_print = ref false
let debug_prec = ref (10,4)
    
let print lcp =
  let varstr3 var =
    let s,a = match var with
    | W a -> "W",a
    | Z a -> "Z",a
    | X a -> "X",a
    | Y a -> "Y",a in
    Printf.sprintf "%s%2d" s a
  in
  let nint,nfrac = !debug_prec in
  Format.printf "@[<0>";
  let h = lcp.t.height and
      w = lcp.t.width in
  (* print top row of space, 1, and nonbasic set variables *)
  for i = 0 to w + 1 do
    if i = 0 then
      Format.printf "    |"
    else if i = 1 then
      Format.printf "%*d |" nint 1
    else
      Format.printf "%*s " nint (varstr3 lcp.nonbasic_set.(i-2))
  done;
  Format.force_newline ();
  for i = 0 to w + 1 do
    if i = 0 then
      Format.printf "----+"
    else if i = 1 then
      Format.printf "%s-+" (String.make nint '-')
    else
      Format.printf "%s-" (String.make nint '-')
  done;
  Format.force_newline ();
  for i = 0 to h - 1 do
    for j = 0 to w + 1 do
    if j = 0 then
      Format.printf "%s |" (varstr3 lcp.basic_set.(i))
    else if j = 1 then
      Format.printf "%*.*f |" nint nfrac lcp.q.data.{i}
    else
      Format.printf "%*.*f " nint nfrac lcp.t.data.{i*w+j-2}
    done;
    if i < lcp.t.height - 1 then
      Format.force_newline ();
  done;
  Format.printf "@]"

    
let pivot_q lcp r s =
  let t = lcp.t.data and
      q = lcp.q.data and
      w = lcp.t.width in
  let n = lcp.t.height in
  (* invert pivot so we can multiply *)
  assert (t.{r*w+s} <> 0.0);
  let m_rs = 1.0 /. t.{r*w+s} in
  (* swap variables in sets *)
  let b = lcp.basic_set.(r) and nb = lcp.nonbasic_set.(s) in
  lcp.basic_set.(r) <- nb; lcp.nonbasic_set.(s) <- b;
  (* do q first *)
  for i = 0 to n - 1 do
    if i <> r then (* skip the pivot row so we can keep using it *)
      q.{i} <- q.{i} -. (t.{i*w+s} *. m_rs) *. q.{r}
  done;
  (* finish up pivot row in q *)
  q.{r} <- -.q.{r} *. m_rs;
  m_rs (* return the inverted pivot to be nice *)

let pivot lcp r s =
  let t = lcp.t.data and
      q = lcp.q.data and
      w = lcp.t.width in
  let n = lcp.t.height in
  (* pivot q first, it returns the inverted pivot *)
  let m_rs = pivot_q lcp r s in
  t.{r*w+s} <- m_rs;
  (* now pivot m *)
  for i = 0 to n - 1 do
    if i <> r then (* skip the pivot row again *)
      for j = 0 to n do
        if j <> s then (* skip the pivot column too *)
          t.{i*w+j} <- t.{i*w+j} -. (t.{i*w+s} *. m_rs) *. t.{r*w+j}
      done
  done;
  (* now finish up pivot column... *)
  for i = 0 to n - 1 do
    if i <> r then (* skip pivot element, handled above *)
      t.{i*w+s} <- t.{i*w+s} *. m_rs
  done;
  (* ...and row *)
  for j = 0 to n do
    if j <> s then (* skip pivot element, handled above *)
      t.{r*w+j} <- -.t.{r*w+j} *. m_rs
  done

let find_blocking_basic lcp s =
  (* find the pivot element by doing a min-ratio test, skipping free vars *)
  let t = lcp.t.data and
      q = lcp.q.data and
      w = lcp.t.width in
  let n = lcp.t.height in
  let pil = ref [] and min_ratio = ref max_float in
  for i = 0 to n - 1 do
    match lcp.basic_set.(i) with
      Y _ -> failwith "Y = 0 in basic";
    | X _ -> ()
    | _ -> 
        if t.{i*w+s} < 0.0 then
          let ratio = -.q.{i} /. t.{i*w+s} in
          if ratio < !min_ratio then
            begin
              (* flush the list, we found a new one *)
              pil := [i]; 
              min_ratio := ratio
            end
          else if ratio = !min_ratio then (* @todo probably need eps test *)
            (* keep a list of min indices *)
            pil := i :: !pil 
  done;
  if !pil = [] then
    failwith "secondary ray termination"
  else
    try
      (* always return z0 if it's blocking *)
      List.iter (fun i -> if lcp.basic_set.(i) = Z 0 then raise (Found i)) !pil;
      (* else, select a random blocking variable *)
      (* @todo this is lame and won't prevent cycling anyway *)
      (* @todo test this path? *)
      List.nth !pil (Random.int (List.length !pil))
    with Found i -> i
let _ = Random.self_init () (* @todo hack *)

let find_nonbasic_complement lcp v =
  let comp = match v with
    W a -> Z a
  | Z a -> W a
  | X a -> Y a
  | Y a -> X a
  in
  try
    Array.iteri (fun i nbv ->
      if nbv = comp then raise (Found i)) lcp.nonbasic_set;
    raise Not_found
  with Found i -> i

let block_pivot lcp basic_pivots nonbasic_pivots = 
  (*
     We block pivot by recognizing that a block pivot's matrix
     inversion can be handled by a series of single pivots, just like
     gaussian elimination does matrix inversion.  We pivot the block
     by iterating over the nonbasic variables we want to pivot, and
     then searching for non-zero pivots in the not-yet-pivoted part of
     the basic set.  This moves all of the nonbasics to basic and vice
     versa.

     @todo should this handle compatible systems?
     to do this, instead of just doing a row search on the basics, you'd also
     check for multiple basic variables going to zero at the same time,
     and pivot all of them, I think
   *)
  let t = lcp.t.data and
      q = lcp.q.data and
      w = lcp.t.width in

  (* keep track of the remaining pivots in the array *)
  let blen = ref (Array.length basic_pivots) in
  for i = 0 to Array.length nonbasic_pivots - 1 do
    let nbi = nonbasic_pivots.(i) in

    (* find the largest value for the pivot *)
    let bmax = ref (0.0,0) in   (* max, row idx *)
    let jidx = ref 0 in         (* idx of pivot entry *)
    for j = 0 to !blen - 1 do
      let bi = basic_pivots.(j) in
      let b = abs_float t.{bi*w+nbi} in
      if b > fst !bmax then begin
        bmax := (b,bi);
        jidx := j;
      end
    done;

    if !debug_print then begin
      print lcp; Format.print_newline ();
    end;
    
    pivot lcp (snd !bmax) nbi;  (* pivot the largest row *)
    
    (* we finished this pivot, so swap it out *)
    basic_pivots.(!jidx) <- basic_pivots.(!blen - 1); 
    blen := !blen - 1;
  done;
  ()    

  
let lemke_advanced_basis m' q' num_free_vars initial_block_pivot_basis_set =
  (* @todo we currently assume the free variables can be block pivoted as
     a basis and we don't need to find a new complementary basis *)
  (* @todo we also assume that the free variables are the first num_free_vars
     variables in the vector *)
  (* @todo we also assume the X free variables and the Y = 0 variables are
     1-to-1 *)
  (* @todo why do we have num_initial_block_pivot, given these assumptions?
     @todo if I had robust test cases, I could refactor it out and find out *)
  assert (num_free_vars >= 0 && num_free_vars <= (Array.length initial_block_pivot_basis_set));

  (* @todo this code is pretty much lemke, but with minor changes...refactor them! *)

  let n = m'.width in
  assert (m'.height = n && q'.height = n && q'.width = 1);
  (* allocate our z result and the m = [d m'] tableau *)
  let z = create_vector n in
  let m = create_matrix n (n+1) in
  for i = 0 to n - 1 do
    m.data.{i*m.width+0} <- 1.0;  (* initialize covering vector d to 1's *)
    for j = 1 to n do
      m.data.{i*m.width+j} <- m'.data.{i*m'.width+j-1}
    done
  done;
  let q = create_vector n in
  (* find the smallest z0 for w = q + d*z0 >= 0 and check if the free vars are solved *)
  let pi = ref n and min_q = ref max_float and
      free_solved = ref true in
  for i = 0 to n - 1 do
    q.data.{i} <- q'.data.{i};
    if i < num_free_vars then begin
      if q.data.{i} <> 0.0 then
        free_solved := false
    end else begin
      if q.data.{i} < !min_q then begin
        pi := i;
        min_q := q.data.{i}
      end
    end
  done;
  if !min_q >= 0.0 && !free_solved then begin
    (* we're done, z = 0 solves the problem *)
    fill z 0.0;
    z
  end else begin
    (* do lemke's algorithm with an advanced start *)
   
    (* set variables are 1 based:
       w/x/y's are w1-wn, z's are z0-zn, where z0 is the artificial var *)
    let basic_set = Array.init n (fun i ->
      if i < num_free_vars then
        Y (i+1)
      else
        W (i-num_free_vars+1))
    in
    let nonbasic_set = Array.init (n+1) (fun i ->
      if i = 0 then
        Z 0
      else if i <= num_free_vars then
        X i
      else
        Z (i-num_free_vars)) in

    let lcp = { t = m; q = q; basic_set = basic_set; nonbasic_set = nonbasic_set } in

    let t = lcp.t.data and
        q = lcp.q.data and
        w = lcp.t.width in

    (* advanced start *)
    if Array.length initial_block_pivot_basis_set > 0 then begin
      let np = Array.length initial_block_pivot_basis_set in

      if !debug_print then begin
        Format.printf "@\nAdvanced start block pivot:@\n";
        for i = 0 to np - 1 do
          Format.printf "%d, " initial_block_pivot_basis_set.(i);
        done;
        Format.printf "@\n";
      end;
      
      (* block pivot to form the warm start initial basis *)
      (* @optimize why am I copying this? it's destroyed *)
      block_pivot lcp (Array.copy initial_block_pivot_basis_set)
        (Array.init np (fun i -> initial_block_pivot_basis_set.(i) + 1));

      (* reinit the covering vector to 0/1 for free/positive *)
      Array.iteri (fun i el -> 
        match el with
          Y _ -> failwith "advanced start with Y = 0 variables still in basic"
        | X _ -> t.{i*w+0} <- 0.0
        | Z _ | W _ -> t.{i*w+0} <- 1.0) lcp.basic_set;
      
      (* find the new blocking variable *)
      (* @todo refactor this with above *)
      min_q := max_float;
      for i = 0 to n - 1 do
        match lcp.basic_set.(i) with
          Y _ -> failwith "advanced start with Y = 0 variables still in basic"
        | X _ -> ()
        | _ -> 
            if q.{i} < !min_q then begin
              pi := i;
              min_q := q.{i}
            end
      done;
    end;

    if !debug_print then begin
      Format.printf "@\nStarting Solve:@\n";
    end;
    
    (* check for done-ness with min_q >= 0.0 (free_solved is always
       true here due to the advanced start) so that using this as an
       Ax=b solver will work *)
    if !min_q < 0.0 then begin
      (* initial pivot of pj = z0 into the basic set, pi into nonbasic *)
      let pj = ref 0 in

      (* we loop until z0 goes back into the nonbasic set *)
      while lcp.basic_set.(!pi) <> Z 0 do

        if !debug_print then begin
          print lcp; Format.print_newline ();
        end;
        
        pivot lcp !pi !pj;

        (* the new pj's complement is the driving variable *)
        pj := find_nonbasic_complement lcp nonbasic_set.(!pj);

        (* find pj's blocking basic variable in q *)
        pi := find_blocking_basic lcp !pj;

      done;

      if !debug_print then begin
        Format.printf "@\nFinal Pivot:@\n";
        print lcp; Format.print_newline ();
      end;

      (* final pivot only q to get the answer *)
      (* @todo ignore (pivot_q lcp !pi !pj); *)
      pivot lcp !pi !pj; (* @todo only pivot q *)
    end;

    if !debug_print then begin
      Format.printf "@\nDone:@\n";
      print lcp; Format.print_newline ();
    end;

    (* form the final z vector *)
    for i = 0 to n - 1 do
      match lcp.basic_set.(i) with
        (* we're using 1 based indices in the set variables *)
        Y _ -> failwith "Y = 0 in basic"
      | X a -> z.data.{a-1} <- q.{i}
      | Z a -> z.data.{a+num_free_vars-1} <- q.{i}
      | W a -> z.data.{a+num_free_vars-1} <- 0.0
    done;
    z
  end

let lemke m' q' = lemke_advanced_basis m' q' 0 [||]
let lemke_advanced m' q' n = lemke_advanced_basis m' q' n (Array.init n (fun i -> i))

let tests = [
  create_array 4 4 [|   8.0047;     0.6033;    -7.9953;     0.6033; 
                        0.6033;    84.7953;     0.6033;    68.7953; 
                        -7.9953;     0.6033;    17.3701;    10.7519; 
                        0.6033;    68.7953;    10.7519;   168.2299; |],
  create_array 4 1 [|  0.0006;
                       0.0000;
                       -0.3844;
                       -2.7769; |];

  create_array 3 3 [|  1.;  2.;  3.;
                       4.;  5.;  6.;
                       7.;  8.; 10.; |],
  create_array 3 1 [| 1.; 4.; 6.; |];
  
  create_array 5 5 [|  0.;  0.; -1.; -5.;  1.;
                       0.;  0.; -5.;  1.;  1.;
                       1.;  5.;  0.;  0.;  0.;
                       5.; -1.;  0.;  0.;  0.;
                      -1.; -1.;  0.;  0.;  0.; |],
  create_array 5 1 [| 1.; 10.; 15.; 11.; -4.; |];

  create_array 3 3 [| 0.; -1.; 2.;
                      2.; 0.; -2.;
                      -1.; 1.; 0.; |],
  create_array 3 1 [| -3.; 6.; -1. |];

  create_array 6 6 [| 2.;-2.;-1.;1.;1.;-1.;
                      -2.;2.;1.;-1.;-1.;1.;
                      -1.;1.;2.;-2.;1.;-1.;
                      1.;-1.;-2.;2.;-1.;1.;
                      1.;-1.;1.;-1.;2.;-2.;
                      -1.;1.;-1.;1.;-2.;2.; |],
  create_array 6 1 [| 90.;-30.;-10.;30.;80.;-25.|];

  create_array 2 2 [| 1.; -1.;
                      -1.; 1.; |],
  create_array 2 1 [| 1.; -1. |]
] 
  
let test n =
  match n with
    0 -> { t = create_array 4 5 [| 0.; 0.;  0.; -1.; -5.;
                                   0.; 0.;  0.; -5.;  1.;
                                   0.; 1.;  5.;  0.;  0.;
                                   0.; 5.; -1.;  0.;  0.; |];
           q = create_array 4 1 [| 1.; 10.; 15.; 11.; |];
           basic_set = Array.init 4 (fun i -> W (i+1));
           nonbasic_set = Array.init (4+1) (fun i -> Z i); },
      ([|0;1;2;3|],[|1;2;3;4|])
  | 1 -> { t = create_array 3 4 [| 0.;  1.;  2.;  3.;
                                   0.;  4.;  5.;  6.;
                                   0.;  7.;  8.; 10.; |];
           q = create_array 3 1 [| 1.; 4.; 6.; |];
           basic_set = Array.init 3 (fun i -> W (i+1));
           nonbasic_set = Array.init (3+1) (fun i -> Z i); },
      ([|0;1;2|],[|1;2;3|])
  | 2 -> { t = create_array 6 7 [| 1.0000;     9.4261;   -10.3679;    -6.5739;   -10.3679;     0.0000;     0.0000; 
                                   1.0000;   -10.3679;    83.3739;   -10.3679;    67.3739;     0.0000;     0.0000; 
                                   1.0000;    -6.5739;   -10.3679;    25.7199;    13.4686;     0.2938;    23.8364; 
                                   1.0000;   -10.3679;    67.3739;    13.4686;   159.8801;    23.8364;    60.5062; 
                                   1.0000;     0.0000;     0.0000;     0.2938;    23.8364;    34.1913;    -1.8963; 
                                   1.0000;     0.0000;     0.0000;    23.8364;    60.5062;    -1.8963;   151.4087; |];
           q = create_array 6 1 [|    0.0366;
                                      0.0043;
                                      0.2392;
                                      -0.0558;
                                      -18.3837;
                                      14.6963;  |];
           basic_set = Array.init 6 (fun i -> Y (i+1));
           nonbasic_set = Array.init (6+1) (fun i -> if i = 0 then Z 0 else X i); },
      ([|0;1;2;3;4;5;|],[|1;2;3;4;5;6|])
  | _ -> failwith "invalid test number"