/* text.c - TEXT state machine.
   Copyright (C) 2000 Free Software Foundation, Inc.

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License
   as published by the Free Software Foundation; either version 2
   of the License, or (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
   02111-1307, USA.  */

/* Written by Marc Tardif <intmktg@cam.org>.  */

/* The state machine in this program can be summarised with the
   following components:

   - A list of events that are handled by the machine.  These are
     actually the characters read from the input which can take
     values from 0 to UCHAR_MAX.  The NUL character is special
     because it is also used as a sentinel to mark the buffer end.

   - A list of actions that describe how the machine reacts to an
     event.  The return value is used in a multi-way decision tree
     to determine the statements to run on a particular action.

   - A set of states for each possible condition of the machine.
     Each state corresponds to a table containing as many elements
     as events.  For each event in a table, a corresponding action
     is defined.

   The list of events can be separated in subsets which constantly
   share the same actions.  For example, numeric characters always
   return the same action for a particular state.  Therefore, a
   GENERIC table is defined for the set of possible events and
   returns a subset.  This smaller set can then be used to define
   smaller state tables.
*/

#include <stdlib.h>

#include "ac.h"
#include "kw.h"
#include "options.h"

#include "tab.h"       /* GENERIC state table.  */
#include "text.h"      /* TEXT state table.  */

enum
{
  NOP,  /* no operation                         */
  PAT,  /* pattern                              */
  PL2P, /* pattern-line to pattern              */
  PL2D, /* pattern-line to dash                 */
  PL2S, /* pattern-line to space                */
  PS2P, /* pattern-space to pattern             */
  PS2D, /* pattern-space to dash                */
  PS2F, /* pattern-space to form feed           */
  PS2L, /* pattern-space to line                */
  PW2D, /* pattern to dash                      */
  PW2F, /* pattern to form feed                 */
  PW2L, /* pattern to line                      */
  PW2S, /* pattern to space                     */
  S2N,  /* space to NUL byte                    */
  S2P,  /* space to pattern                     */
  S2W,  /* space to word                        */
  W2N,  /* word to NUL byte                     */
  W2P,  /* word to pattern                      */
  W2S,  /* word to space                        */
  WRD   /* Word                                 */
};

/*    NU,   SP,  pSP,   NL,  pNL,   NP,  pNP,   DQ,  pDQ,
            AM,  pAM,   SQ,  pSQ,   DA,  pDA,   DI,  pDI,
            LT,  pLT,   GT,  pGT,   LE,  pLE,   UN,  pUN */
static unsigned char pspace[] =
  {  S2N,  NOP,  NOP, PS2L, PS2L, PS2F, PS2F, PS2P, PS2P,
          PS2P, PS2P, PS2P, PS2P, PS2D, PS2D, PS2P, PS2P,
          PS2P, PS2P, PS2P, PS2P, PS2P, PS2P, PS2P, PS2P };
static unsigned char space[] =
  {  S2N,  NOP,  NOP,  NOP,  NOP,  NOP,  NOP,  S2W,  S2P,
           S2W,  S2P,  S2W,  S2P,  S2W,  S2P,  S2W,  S2P,
           S2W,  S2P,  S2W,  S2P,  S2W,  S2P,  S2W,  S2P };
static unsigned char pword[] =
  {  W2N, PW2S, PW2S, PW2L, PW2L, PW2F, PW2F,  PAT,  PAT,
           PAT,  PAT,  PAT,  PAT, PW2D, PW2D,  PAT,  PAT,
           PAT,  PAT,  PAT,  PAT,  PAT,  PAT,  PAT,  PAT };
static unsigned char word[] =
  {  W2N,  W2S,  W2S,  W2S,  W2S,  W2S,  W2S,  WRD,  W2P,
           WRD,  W2P,  WRD,  W2P,  WRD,  W2P,  WRD,  W2P,
           WRD,  W2P,  WRD,  W2P,  WRD,  W2P,  WRD,  W2P };
static unsigned char pline[] =
  {  S2N,  NOP,  NOP, PL2S, PL2S, PL2S, PL2S, PL2P, PL2P,
          PL2P, PL2P, PL2P, PL2P, PL2D, PL2D, PL2P, PL2P,
          PL2P, PL2P, PL2P, PL2P, PL2P, PL2P, PL2P, PL2P };

unsigned char *text_first = word;

int
text_find (char **pos, char **buf, char *lim)
{
  register unsigned char c;
  register char *end;
  unsigned char *table;
  int ret, words;

  table = *kw->pattern_table;
  words = kw->words;
  end = *pos;

  for (;;)
    {
      c = *end++;
      switch (table[tab[c]])
        {
        case S2N:
        case W2N:
          if (end > lim)
            {
              end--;
              ret = 0;
              goto fin;
            }
        case WRD:
        case NOP:
          break;

        case PL2S:
        case PS2F:
        case PW2F:
        case W2S:
          table = space;
          break;

        case S2W:
          words++;
          table = word;
          break;

        case S2P:
          words++;
        case W2P:
          table = pword;
          find_first ();
          break;

        case PS2D: case PL2D:
        case PS2P: case PL2P:
          words++;
          table = pword;
        case PAT:
          find_next ();
          break;

        case PW2S:
          table = pspace;
          find_space ();
          break;
        case PW2L:
          table = pline;
          find_space ();
          break;

        case PW2D:
          table = pspace;
          find_maybe ();
          break;

        case PS2L:
          table = pline;
          break;

        default:
          abort ();
        }
    }

 fin:
  kw->words = words;
  *kw->pattern_table = table;
  *buf = *pos = end;

  return ret - 1;
}

unsigned char *
text_forward (char *pos, char *lim)
{
  register unsigned char c;
  register char *end, *fpos;
  unsigned char *table;
  char *max;

  table = *kw->context_table ? *kw->context_table : pword;
  max = kw->str + MAX (opt.context/2, kw->bpos - 1);
  fpos = kw->str + kw->fpos;
  end = pos;

  while (fpos < max)
    {
      c = *end++;
      switch (table[tab[c]])
        {
        case S2N:
          if (end <= lim)
            fpos--, table = NULL;
          goto fin;
        case W2N:
          if (end <= lim)
            table = NULL;
          goto fin;

        case PAT:
        case PW2D:
        case WRD:
          *fpos++ = c;
        case NOP:
          break;

        case PS2D: case PL2D:
        case PS2P: case PL2P:
          *fpos++ = c;
          table = pword;
          break;
        case PW2S:
          *fpos++ = ' ';
          table = pspace;
          break;
        case PW2L:
          *fpos++ = ' ';
        case PS2L:
          table = pline;
          break;

        case PL2S:
        case PS2F:
          fpos--;
        case PW2F:
          table = NULL;
          goto fin;

        default:
          abort ();
        }
    }

  if (table != pword)
    fpos--;
  table = NULL;

 fin:
  *kw->context_table = table;
  kw->fpos = fpos - kw->str;
  return table;
}

unsigned char *
text_backward (char *pos)
{
  register unsigned char c;
  register char *beg, *bpos;
  unsigned char *table;

  bpos = kw->str + opt.context;
  table = pspace;
  beg = pos;

  while (bpos > kw->str)
    {
      c = *(--beg);
      switch (table[tab[c]])
        {
        case PAT:
        case PW2D:
        case WRD:
          *bpos-- = c;
        case NOP:
          break;

        case PL2D: case PS2D:
        case PL2P: case PS2P:
          *bpos-- = c;
          table = pword;
          break;
        case PW2S:
          *bpos-- = ' ';
          pos = beg + 1;
          table = pspace;
          break;
        case PW2L:
          *bpos-- = ' ';
          pos = beg + 1;
        case PS2L:
          table = pline;
          break;

        case W2N:
          pos = beg + 1;
          goto fin;

        case PL2S:
        case PS2F:
        case S2N:
          bpos++;
        case PW2F:
          goto fin;

        default:
          abort ();
        }
    }

 fin:
  kw->bpos = bpos - kw->str + 1;
  return pos;
}

