Wednesday, 29 November 2017

DFA in C#

using System;
using SCG = System.Collections.Generic;
using C5;

using state = System.Int32;
using input = System.Char;

namespace RegularExpressionEngine
{
  /// <summary>
  /// Implements a deterministic finite automata (DFA)
  /// </summary>
  class DFA
  {
    // Start state
    public state start;
    // Set of final states
    public Set<state> final;
    // Transition table
    public SCG.SortedList<KeyValuePair<state, input>, state> transTable;

    public DFA()
    {
      final = new Set<state>();

      transTable = new SCG.SortedList<KeyValuePair<state, input>, state>(new Comparer());
    }

    public string Simulate(string @in)
    {
      state curState = start;

      CharEnumerator i = @in.GetEnumerator();

      while(i.MoveNext())
      {
        KeyValuePair<state, input> transition = new KeyValuePair<state, input>(curState, i.Current);

        if(!transTable.ContainsKey(transition))
          return "Rejected";

        curState = transTable[transition];
      }

      if(final.Contains(curState))
        return "Accepted";
      else
        return "Rejected";
    }

    public void Show()
    {
      Console.Write("DFA start state: {0}\n", start);
      Console.Write("DFA final state(s): ");

      SCG.IEnumerator<state> iE = final.GetEnumerator();

      while(iE.MoveNext())
        Console.Write(iE.Current + " ");

      Console.Write("\n\n");

      foreach(SCG.KeyValuePair<KeyValuePair<state, input>, state> kvp in transTable)
        Console.Write("Trans[{0}, {1}] = {2}\n", kvp.Key.Key, kvp.Key.Value, kvp.Value);
    }
  }

  /// <summary>
  /// Implements a comparer that suits the transTable SordedList
  /// </summary>
  public class Comparer : SCG.IComparer<KeyValuePair<state, input>>
  {
    public int Compare(KeyValuePair<state, input> transition1, KeyValuePair<state, input> transition2)
    {
      if(transition1.Key == transition2.Key)
        return transition1.Value.CompareTo(transition2.Value);
      else
        return transition1.Key.CompareTo(transition2.Key);
    }
  }

}
As you see, a DFA has 3 variables: a start state, a set of final states and a transition table that maps transitions between states.
Below I present the SubsetMachine class that is responsible for the hard work of extracting an equivalent DFA from a given NFA:
//
//  Regular Expression Engine C# Sample Application
//  2006, by Leniel Braz de Oliveira Macaferi & Wellington Magalhães Leite.
//
//  UBM's Computer Engineering - 7th term [http://www.ubm.br/]
// 
//  This program sample was developed and turned in as a term paper for Lab. of
//  Compilers Construction. It was based on the source code provided by Eli Bendersky
//  [http://eli.thegreenplace.net/] and is provided "as is" without warranty.
//

using System;
using SCG = System.Collections.Generic;
using C5;

using state = System.Int32;
using input = System.Char;

namespace RegularExpressionEngine
{
  class SubsetMachine
  {
    private static int num = 0;

    /// <summary>
    /// Subset machine that employs the powerset construction or subset construction algorithm.
    /// It creates a DFA that recognizes the same language as the given NFA.
    /// </summary>
    public static DFA SubsetConstruct(NFA nfa)
    {
      DFA dfa = new DFA();

      // Sets of NFA states which is represented by some DFA state
      Set<Set<state>> markedStates = new Set<Set<state>>();
      Set<Set<state>> unmarkedStates = new Set<Set<state>>();

      // Gives a number to each state in the DFA
      HashDictionary<Set<state>, state> dfaStateNum = new HashDictionary<Set<state>, state>();

      Set<state> nfaInitial = new Set<state>();
      nfaInitial.Add(nfa.initial);

      // Initially, EpsilonClosure(nfa.initial) is the only state in the DFAs states and it's unmarked.
      Set<state> first = EpsilonClosure(nfa, nfaInitial);
      unmarkedStates.Add(first);

      // The initial dfa state
      state dfaInitial = GenNewState();
      dfaStateNum[first] = dfaInitial;
      dfa.start = dfaInitial;

      while(unmarkedStates.Count != 0)
      {
        // Takes out one unmarked state and posteriorly mark it.
        Set<state> aState = unmarkedStates.Choose();

        // Removes from the unmarked set.
        unmarkedStates.Remove(aState);

        // Inserts into the marked set.
        markedStates.Add(aState);

        // If this state contains the NFA's final state, add it to the DFA's set of
        // final states.
        if(aState.Contains(nfa.final))
          dfa.final.Add(dfaStateNum[aState]);

        SCG.IEnumerator<input> iE = nfa.inputs.GetEnumerator();

        // For each input symbol the nfa knows...
        while(iE.MoveNext())
        {
          // Next state
          Set<state> next = EpsilonClosure(nfa, nfa.Move(aState, iE.Current));

          // If we haven't examined this state before, add it to the unmarkedStates and make up a new number for it.
          if(!unmarkedStates.Contains(next) && !markedStates.Contains(next))
          {
            unmarkedStates.Add(next);
            dfaStateNum.Add(next, GenNewState());
          }

          KeyValuePair<state, input> transition = new KeyValuePair<state, input>();
          transition.Key = dfaStateNum[aState];
          transition.Value = iE.Current;

          dfa.transTable[transition] = dfaStateNum[next];
        }
      }

      return dfa;
    }

    /// <summary>
    /// Builds the Epsilon closure of states for the given NFA
    /// </summary>
    /// <param name="nfa"></param>
    /// <param name="states"></param>
    /// <returns></returns>
    static Set<state> EpsilonClosure(NFA nfa, Set<state> states)
    {
      // Push all states onto a stack
      SCG.Stack<state> uncheckedStack = new SCG.Stack<state>(states);

      // Initialize EpsilonClosure(states) to states
      Set<state> epsilonClosure = states;

      while(uncheckedStack.Count != 0)
      {
        // Pop state t, the top element, off the stack
        state t = uncheckedStack.Pop();

        int i = 0;

        // For each state u with an edge from t to u labeled Epsilon
        foreach(input input in nfa.transTable[t])
        {
          if(input == (char)NFA.Constants.Epsilon)
          {
            state u = Array.IndexOf(nfa.transTable[t], input, i);

            // If u is not already in epsilonClosure, add it and push it onto stack
            if(!epsilonClosure.Contains(u))
            {
              epsilonClosure.Add(u);
              uncheckedStack.Push(u);
            }
          }

          i = i + 1;
        }
      }

      return epsilonClosure;
    }

    /// <summary>
    /// Creates unique state numbers for DFA states
    /// </summary>
    /// <returns></returns>
    private static state GenNewState()
    {
      return num++;
    }

  }
}

No comments:

Post a Comment