Code internal class TokenFactory { /// <summary> /// 产生记号对象 /// </summary> /// <param name="TokenWord">记号对应的字符串</param> /// <param name="Index">记号开始的列序号</param> /// <returns>记号对象</returns> protected static TokenRecord ProduceToken(string TokenWord, int Index) { throw new Exception("必须在子类中实现方法"); } /// <summary> /// 词法分析 /// </summary> /// <param name="TokenList">记号对象列表</param> /// <param name="Code">表达式</param> /// <param name="Index">列序号</param> public static void LexicalAnalysis(List<TokenRecord> TokenList, string Code, ref int Index) { char chrTemp;//临时字符 for (int intIndex = 0; intIndex < Code.Length; intIndex++) { chrTemp = Code[intIndex];//获取当前字符 //关键字分析 if (char.IsLetter(chrTemp))//如果当前字符为字母,进行关键字处理 { TokenKeywordFactory.LexicalAnalysis(TokenList, Code, ref intIndex); } else if (chrTemp.Equals('\'') || chrTemp.Equals('"'))//如果是字符串分隔符,进行字符串处理 { TokenStringFactory.LexicalAnalysis(TokenList, Code, ref intIndex); } else if (char.IsDigit(chrTemp))//数值处理 { TokenNumberFactory.LexicalAnalysis(TokenList, Code, ref intIndex); } else if (TokenSymbolFactory.GetSymbolDictionary().ContainsKey(chrTemp.ToString()))//运算符处理 { //有些运算符为双字符,但这里所有的双字符运算符的前一个字符都有对应的单字符运算符,可以只考虑一个字符 TokenSymbolFactory.LexicalAnalysis(TokenList, Code, ref intIndex); } else if (chrTemp.Equals(' ')) { //如果是空格,则忽略不处理 } else//错误处理 { //抛出错误 throw new SyntaxException(intIndex, 1, "包含不合法字符:" + chrTemp.ToString()); } }//for }//LexicalAnalysis /// <summary> /// 获取操作记号字典 /// </summary> /// <param name="OperateTokenType">操作记号类型</param> /// <returns>操作记号字典</returns> /// <remarks>Author:Alex Leo; Date:2008-5-19; Remark:基于本地文件TokenRecord.xml;</remarks> internal static SortedDictionary<string, string> GetOperateTokenDictionary(OperateTokenTypeEnum OperateTokenType) { SortedDictionary<string, string> myDictionary = new SortedDictionary<string, string>(); if (myDictionary.Count == 0) { XmlDocument myDoc = new XmlDocument(); myDoc.Load("./TokenRecord.xml"); XmlNodeList KeywordList = myDoc.SelectNodes(string.Format("TokenRecord/{0}/Token", Enum.GetName(typeof(OperateTokenTypeEnum),OperateTokenType))); foreach (XmlNode Node in KeywordList) { myDictionary.Add(Node.Attributes["Word"].Value, Node.Attributes["Class"].Value); } } return myDictionary; }//GetOperateTokenDictionary }//class TokenFactory
在LexicalAnalysis方法中,只需要判断当前字符是否符合一定的起始规则,如果符合起始规则,就交给对应的工厂类去处理。 下面用例子来讲吧,比如123.3*2-(24+34),分析成记号对象如下: 记号对象 | 对应表达式 |
TokenValue | 123.3 |
TokenMultiply | * |
TokenValue | 2 |
TokenMinus | - |
TokenLeftBracket | ( |
TokenValue | 24 |
TokenPlus | + |
TokenValue | 34 |
TokenRightBracket | ) |
Code <?xml version="1.0" encoding="utf-8" ?><TokenRecord> <TokenKeyword> <!--以下字符串处理函数记号对象--> <Token Word="mid" Class="TokenMid" Example="mid('abcdefg',2,3) = 'bcd'" /> <Token Word="left" Class="TokenLeft" Example="left('abcdefg',5) = 'abcde'" /> <Token Word="right" Class="TokenRight" Example="right('abcdefg',5) = 'cdefg'" /> <Token Word="string" Class="TokenToString" Example="string(53.6) = '53.6'" /> <!--以下为数学运算记号对象--> <Token Word="round" Class="TokenRound" Example="round(0.12345,3) = 0.123" /> <Token Word="abs" Class="TokenAbs" Example="abs(-5) = 5" /> <Token Word="max" Class="TokenMax" Example="max(3,5) = 5" /> <Token Word="min" Class="TokenMin" Example="min(3,5) = 3" /> <!--如果希望取余采用VB的Mod函数,形如Mod(5.4,2) = 1.4,将TokenMod修改为继承自TokenMethod即可,此时用%必须形如%(5.4,2)--> <!--<Token Word="mod" Class="TokenMod" Example="5.4 mod 2 = 1.4,mod后必须带空格" />--> <Token Word="pow" Class="TokenPow" Example="pow(2,3) = 8" /> <!--以下为三角函数记号对象,均采用角度计算而非弧度--> <Token Word="sin" Class="TokenSin" Example="sin(30) = 0.5" /> <Token Word="asin" Class="TokenAsin" Example="asin(0.5) = 30" /> <Token Word="cos" Class="TokenCos" Example="cos(60) = 0.5" /> <Token Word="acos" Class="TokenAcos" Example="acos(0.5) = 60" /> <Token Word="tan" Class="TokenTan" Example="tan(45) = 1" /> <Token Word="atan" Class="TokenAtan" Example="atan(1) = 45" /> <Token Word="atan2" Class="TokenAtan2" Example="atan2(30,30) = 45" /> <!--以下为逻辑运算记号对象,可以同时表示为关键字和运算符,因为它们的格式一致,都为true operate false--> <Token Word="and" Class="TokenAnd" Example="true and false = false" /> <Token Word="or" Class="TokenOr" Example="true or false = true" /> <Token Word="not" Class="TokenNot" Example="not true = false" /> <Token Word="xor" Class="TokenXor" Example="true xor false = true" /> <!--以下为常量记号对象--> <Token Word="pi" Class="TokenValue" Example="pi*1 = 3.1415926" /> <Token Word="e" Class="TokenValue" Example="e*1 = 2.71828" /> <Token Word="true" Class="TokenValue" Example="true and false = false" /> <Token Word="false" Class="TokenValue" Example="true and false = false" /> <!--以下为其他记号对象--> <Token Word="if" Class="TokenIf" Example="if(3>5,12,24) = 24" /> </TokenKeyword> <TokenSymbol> <!--以下为分隔符--> <Token Word="(" Class="TokenLeftBracket" Example="2*(5-3) = 4" /> <Token Word=")" Class="TokenRightBracket" Example="2*(5-3) = 4" /> <Token Word="," Class="TokenComma" Example="left('abcdefg',5) = 'abcde'" /> <!--以下为数学运算符--> <Token Word="+" Class="TokenPlus" Example="2+3 = 5 或 'abc' + 'def' = 'abcdef'" /> <Token Word="-" Class="TokenMinus" Example="5-3 = 2" /> <Token Word="*" Class="TokenMultiply" Example="3*4 = 12" /> <Token Word="/" Class="TokenDivide" Example="8/2 = 4" /> <Token Word="%" Class="TokenMod" Example="5.4%2 = 1.4" /> <!--如果希望求幂采用VB的^算法,形如2^3 = 8,将TokenPow修改为继承自TokenArithmetic即可,此时用pow则必须输入2 pow 3,pow后必须带空格--> <!--<Token Word="^" Class="TokenPow" Example="^(2,3) = 8" />--> <!--以下为比较运算符--> <Token Word="=" Class="TokenEqual" Example="if(3=5,12,24) = 24" /> <Token Word="==" Class="TokenEqual" Example="if(3==5,12,24) = 24" /> <Token Word="<>" Class="TokenNotEqual" Example="if(3<>5,12,24) = 12" /> <Token Word="!=" Class="TokenNotEqual" Example="if(3!=5,12,24) = 12" /> <Token Word=">" Class="TokenGreatThan" Example="if(3>5,12,24) = 24" /> <Token Word=">=" Class="TokenGreatOrEqual" Example="if(3>=5,12,24) = 24" /> <Token Word="<" Class="TokenLessThan" Example="if(3<5,12,24) = 12" /> <Token Word="<=" Class="TokenLessOrEqual" Example="if(3<=5,12,24) = 12" /> <!--以下为逻辑运算符,未定义短路操作,可自行实现--> <Token Word="!" Class="TokenNot" Example="!true = false" /> <Token Word="&" Class="TokenAnd" Example="true & false = false" /> <Token Word="&&" Class="TokenAnd" Example="true && false = false" /> <Token Word="|" Class="TokenOr" Example="true | false = true" /> <Token Word="||" Class="TokenOr" Example="true || false = true" /> </TokenSymbol></TokenRecord>
接下来介绍各个工厂类。首先是关键字工厂TokenKeywordFactory。当TokenFactory分析到英文字母的时候,把任务转交给TokenKeywordFactory。该类从分析得到的第一个英文字母开始向后分析,如果后面还是英文字母或者数字,则继续向后分析,否则结束分析。在这里关键字允许包含数字,但第一个字符必须是英文字母。分析结束后,截取分析得到的字符串,然后到交给ProduceToken方法产生一个TokenRecord类的实例。 在ProduceToken方法中,首先判断传入的字符串是否存在于关键字字典中,如果不存在则报错,如果存在则用反射产生一个对应类的实例。其中有些关键字是常量,所以进行了特殊处理,需要设置记号对象的值和类型。 以上工作完成后,调整分析指针的位置,回到TokenFactory类,执行后续分析。TokenKeywordFactory的代码如下: Code /// <summary> /// 关键字工厂 /// </summary> /// <remarks>Author:Alex Leo</remarks> internal class TokenKeywordFactory : TokenFactory { private static SortedDictionary<string, string> m_DictionaryKeyword = new SortedDictionary<string, string>(); /// <summary> /// 获取关键字字典 /// </summary> /// <returns>关键字字典</returns> /// <remarks>Author:Alex Leo; Date:2008-5-19;</remarks> internal static SortedDictionary<string, string> GetKeywordDictionary() { if (m_DictionaryKeyword.Count == 0) { m_DictionaryKeyword = TokenFactory.GetOperateTokenDictionary(OperateTokenTypeEnum.TokenKeyword); } return m_DictionaryKeyword; } /// <summary> /// 词法分析 /// </summary> /// <param name="TokenList">记号对象列表</param> /// <param name="Code">源表达式</param> /// <param name="Index">分析序号</param> /// <remarks>Author:Alex Leo</remarks> public static new void LexicalAnalysis(List<TokenRecord> TokenList, string Code, ref int Index) { int intIndexCurrent = Index;//当前序号 bool blnContinue = true; //string strTempChar = "";//获取临时字符 char chrTemp; string strTempWord = "";//获取临时字符串 while (blnContinue && intIndexCurrent < Code.Length) { chrTemp = Code[intIndexCurrent]; //关键字支持大小写字母及数字,但不允许以数字开头 if (char.IsLetter(chrTemp) || char.IsDigit(chrTemp)) { intIndexCurrent += 1;//把当前序号后移 } else { blnContinue = false; } } strTempWord = Code.Substring(Index, intIndexCurrent - Index);//获取临时词 TokenRecord Token = ProduceToken(strTempWord, Index); TokenList.Add(Token); Index = intIndexCurrent - 1;//设置指针到读取结束位置 } /// <summary> /// 产生记号对象 /// </summary> /// <param name="TokenWord">分析得到的单词</param> /// <param name="Index">当前序号</param> /// <returns>记号对象</returns> /// <remarks>Author:Alex Leo</remarks> protected static new TokenRecord ProduceToken(string TokenWord, int Index) { TokenRecord Token; if (GetKeywordDictionary().ContainsKey(TokenWord.ToLower())) { string strFullClassName = "ConExpress.Calculator." + GetKeywordDictionary()[TokenWord.ToLower()]; Type TokenType = Type.GetType(strFullClassName); Token = (TokenRecord)Activator.CreateInstance(TokenType, new object[] { Index, TokenWord.Length }); //对常数的特殊处理 switch (TokenWord.ToLower()) { case "pi"://以下为常量记号对象 Token.TokenValueType = typeof(double); Token.TokenValue = Math.PI; break; case "e": Token.TokenValueType = typeof(double); Token.TokenValue = Math.E; break; case "true": Token.TokenValueType = typeof(bool); Token.TokenValue = true; break; case "false": Token.TokenValueType = typeof(bool); Token.TokenValue = false; break; default: break; } } else { //错误字符串,抛出错误,语法错误 throw new SyntaxException(Index, TokenWord.Length, "未知表达式:" + TokenWord); } return Token; }//ProduceToken }//class TokenKeywordFactory
TokenSymbolFactory运算符工厂类的分析过程和TokenKeywordFactory基本相似。但是运算符一般只包含一个字符或者两个字符,就不需要一直向后分析了,只需要判断运算符字典中是否存在对应的项即可。另外有些操作符的第一个字符是重复的,比如“>”和“>=”,这时候就需要判断“>”之后是否还存在“=”。如果向后再截取一个字符,在字典中也存在对应项,则按双字符运算符处理,否则就是单字符运算符。TokenSymbolFactory的代码如下: Code /// <summary> /// 运算符工厂 /// </summary> /// <remarks>Author:Alex Leo</remarks> class TokenSymbolFactory : TokenFactory { private static SortedDictionary<string, string> m_DictionarySymbol = new SortedDictionary<string, string>(); /// <summary> /// 获取运算符字典 /// </summary> /// <returns>运算符字典</returns> /// <remarks>Author:Alex Leo; Date:2008-5-19;</remarks> internal static SortedDictionary<string, string> GetSymbolDictionary() { if (m_DictionarySymbol.Count == 0) { m_DictionarySymbol = TokenFactory.GetOperateTokenDictionary(OperateTokenTypeEnum.TokenSymbol); } return m_DictionarySymbol; } /// <summary> /// 词法分析 /// </summary> /// <param name="TokenList">记号对象列表</param> /// <param name="Code">源表达式</param> /// <param name="Index">分析序号</param> /// <remarks>Author:Alex Leo</remarks> public new static void LexicalAnalysis(List<TokenRecord> TokenList, string Code, ref int Index) { string strTempWord; if ((Index < Code.Length - 1) && GetSymbolDictionary().ContainsKey(Code.Substring(Index, 2)))//如果是双字节操作符 { strTempWord = Code.Substring(Index, 2); Index += 1;//指针后移一位 } else { strTempWord = Code.Substring(Index, 1); } TokenRecord Token = ProduceToken(strTempWord, Index); TokenList.Add(Token); } /// <summary> /// 产生记号对象 /// </summary> /// <param name="TokenWord">分析得到的单词</param> /// <param name="Index">当前序号</param> /// <returns>记号对象</returns> /// <remarks>Author:Alex Leo</remarks> protected new static TokenRecord ProduceToken(string TokenWord, int Index) { TokenRecord Token; if (GetSymbolDictionary().ContainsKey(TokenWord.ToLower())) { string strFullClassName = "ConExpress.Calculator." + GetSymbolDictionary()[TokenWord.ToLower()]; Type TokenType = Type.GetType(strFullClassName); Token = (TokenRecord)Activator.CreateInstance(TokenType, new object[] { Index, TokenWord.Length }); } else { //错误字符串,抛出错误,语法错误 throw new SyntaxException(Index, TokenWord.Length, "未知表达式:" + TokenWord); } return Token; }//ProduceToken }//class TokenSymbolFactory
TokenStringFactory字符串工厂类的分析是按照VB语法,只有引号需要转义,用两个引号即可,其他特殊字符不需要转义。而且引号和JavaScript一样,可以用大写或者小写,只要两端匹配就可以了。如果在单引号标记的字符串中包含双引号,则不需要转义,双引号标记的字符串中出现单引号也不需要转义。字符串的分析是以引号作为界定,向后分析的过程中,如果遇到与起始引号相同的字符,判断后面是否存在重复引号,存在则转义,不存在则结束分析。将两个引号之间的字符串截取,创建一个TokenValue对象,结束分析。TokenStringFactory代码如下: Code /// <summary> /// 字符串工厂 /// </summary> /// <remarks>Author:Alex Leo</remarks> class TokenStringFactory : TokenFactory { /// <summary> /// 词法分析 /// </summary> /// <param name="TokenList">记号对象列表</param> /// <param name="Code">源表达式</param> /// <param name="Index">分析序号</param> /// <remarks>Author:Alex Leo</remarks> public new static void LexicalAnalysis(List<TokenRecord> TokenList, string Code, ref int Index) { string strQuotationMark = Code.Substring(Index, 1);//引号,可以是单引号,也可以是双引号 int intIndexCurrent = Index + 1;//指向后一个字符 string strTempChar = "";//临时字符 StringBuilder strTempWord = new StringBuilder();//临时字符串 bool blnContinue = true; while (blnContinue && intIndexCurrent < Code.Length)//循环直到标志位置False或超出长度 { strTempChar = Code.Substring(intIndexCurrent, 1); if (strTempChar.Equals(strQuotationMark))//如果是字符串分隔符 { if ((intIndexCurrent < Code.Length - 1) && Code.Substring(intIndexCurrent + 1, 1).Equals(strQuotationMark))//如果后面还是引号,则进行转义,将两个引号转义为一个引号字符 { strTempWord.Append(strQuotationMark);//添加一个引号字符到临时字符串中 intIndexCurrent += 2;//向后移两位 } else { blnContinue = false;//遇到字符串结束引号,退出 } } else { strTempWord.Append(strTempChar);//添加当前字符到临时字符串中 intIndexCurrent += 1;//向后移一位 }//if }//while TokenRecord Token = ProduceToken(strTempWord.ToString(), Index); TokenList.Add(Token); Index = intIndexCurrent;//指针移到当前指针,即字符串结束引号 }//LexicalAnalysis /// <summary> /// 产生记号对象 /// </summary> /// <param name="TokenWord">分析得到的单词</param> /// <param name="Index">当前序号</param> /// <returns>记号对象</returns> /// <remarks>Author:Alex Leo</remarks> public new static TokenRecord ProduceToken(string TokenWord, int Index) { TokenRecord Token = new TokenValue(Index, TokenWord.Length); Token.TokenValue = TokenWord; Token.TokenValueType = typeof(string); return Token; } }//class TokenStringFactory
TokenNumberFactory数值工厂的分析最简单,主要是判断下一个字符是否是数字或者小数点。将分析得到的字符串转换为double类型,然后赋值给创建的TokenValue对象即可。TokenNumberFactory代码如下: Code /// <summary> /// 数值工厂 /// </summary> /// <remarks>Author:Alex Leo</remarks> class TokenNumberFactory : TokenFactory { /// <summary> /// 词法分析 /// </summary> /// <param name="TokenList">记号对象列表</param> /// <param name="Code">源表达式</param> /// <param name="Index">分析序号</param> /// <remarks>Author:Alex Leo</remarks> public new static void LexicalAnalysis(List<TokenRecord> TokenList, string Code, ref int Index) { int intIndexCurrent = Index;//指向后一个字符 bool blnContinue = true; char chrTemp; string strTempWord; while (blnContinue && intIndexCurrent < Code.Length) { chrTemp = Code[intIndexCurrent]; if (char.IsDigit(chrTemp) || chrTemp.Equals('.')) { intIndexCurrent += 1; } else { blnContinue = false; } }//while strTempWord = Code.Substring(Index, intIndexCurrent - Index);//取得临时字符串 TokenRecord Token = ProduceToken(strTempWord, Index); TokenList.Add(Token); Index = intIndexCurrent - 1;//指针移到当前指针的前一位,然后赋值给循环指针 }//LexicalAnalysis /// <summary> /// 产生记号对象 /// </summary> /// <param name="TokenWord">分析得到的单词</param> /// <param name="Index">当前序号</param> /// <returns>记号对象</returns> /// <remarks>Author:Alex Leo</remarks> public new static TokenRecord ProduceToken(string TokenWord, int Index) { TokenRecord Token = new TokenValue(Index + 1, TokenWord.Length); Token.TokenValueType = typeof(double); double dblValue; if (double.TryParse(TokenWord, out dblValue)) { Token.TokenValue = dblValue; } else { throw new SyntaxException(Index, TokenWord.Length, "表达式 " + TokenWord + " 无法转换成数值。"); } return Token; }//ProduceToken }//class TokenNumberFactory
通过这些“工厂”类就可以完成把表达式分析成记号对象列表,为程序理解表达式做好了基础工作。在下一篇中会介绍如何将记号对象分析成树视图,进而通过树视图逐级求解。 代码下载: