- UID
- 6266
- 精华
- 积分
- 802
- 威望
- 点
- 宅币
- 个
- 贡献
- 次
- 宅之契约
- 份
- 最后登录
- 1970-1-1
- 在线时间
- 小时
|
以下程序代码实现了将一个正则表达式转换为FSA(Finite-State Automation 有穷状态自动机)的功能。
本程序需要https://github.com/coshcage/StoneValley库的支持。
程序运行后需要输入一个合法的正则表达式,例如 a(ba|ca)|ac|ab) ,程序的输出是该正则表达式的逆波兰形式和转移表。
注意,输入的正则表达式支持‘(’、‘)’、‘|’和‘*’运算。+和?需要自行转换。
a+ == aa*
a? == a | epsilon
在输出的转移表内‘>’表示开始状态,'*'表示结束状态。(数字)为状态,‘\数字’为输入符号。‘\0’表示空转移。
该程序算法的大致原理是:
1、将正则表达式转换为相对应的逆波兰式。
2、扫描逆波兰式,将输入符号取出后转换为状态+输入符号的图。将图的两个节点压入栈内。
3、将运算符取出,并取出相应的图。在相应的图上完成运算后再将图的两个节点压回栈内。
4、对于连接运算,将栈内存储的图节点取出一一做连接运算后压入栈内。
5、最后从站内取出的节点组合就是该自动机的起始节点和结束节点。
该程序未实现自动机的化简。
如果需要化简自动机,需要自行实现删除空环路、删除空变迁、自动机确定化以及自动机约简四个步骤。
有了转移表,就可以使用表驱动的方法运行自动机,识别正则语言。
因为本人水平有限,不到位之处还请大家指出。
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include "svgraph.h"
- #include "svstack.h"
- #include "svqueue.h"
- STACK_L stkOperator, stkOperand;
- QUEUE_L queRegexp;
- GRAPH_L machine;
- struct st_Operator
- {
- char name;
- char level;
- } operators[] = { '(', -1, ')', -1, '*', 2, '|', 1 };
- typedef struct st_Element
- {
- union un_Data
- {
- char operator;
- char operand;
- } data;
- BOOL isOperator;
- } Element, * P_Element;
- typedef struct st_Node
- {
- size_t x;
- size_t y;
- } Node, * P_Node;
- /* Finding info for edges. */
- typedef struct _st_FIEDG {
- EDGE vertex;
- P_NODE_S pnode;
- size_t bweight; /* TRUE for weighted graph; FALSE for unweighted graph. */
- } _FIEDG, * _P_FIEDG;
- int cbftvsFindMaxData(void * pitem, size_t param)
- {
- if (*(size_t *)((P_TNODE_B)pitem)->pdata > * (size_t *)param)
- *(size_t *)param = *(size_t *)((P_TNODE_B)pitem)->pdata;
- return CBF_CONTINUE;
- }
- size_t GenerateUniqueState(P_GRAPH_L pgrp)
- {
- if (setIsEmptyT(pgrp))
- return 1;
- {
- size_t r = 0;
- setTraverseT(pgrp, cbftvsFindMaxData, (size_t)&r, ETM_LEVELORDER);
- return r + 1;
- }
- }
- char GetOperator(char o)
- {
- int i;
- for (i = 0; i < sizeof operators / sizeof operators[0]; ++i)
- if (operators[i].name == o)
- return operators[i].level;
- return 0;
- }
- int Splitter(char * pstr)
- {
- BOOL turn = FALSE;
- char c;
- Element ele;
- while (*pstr != '\0')
- {
- switch (*pstr)
- {
- case '\\':
- if (turn)
- {
- ele.data.operand = *pstr;
- ele.isOperator = FALSE;
- queInsertL(&queRegexp, &ele, sizeof(Element));
- turn = FALSE;
- break;
- }
- turn = TRUE;
- break;
- case '(':
- case ')':
- case '|':
- case '*':
- if (turn)
- {
- ele.data.operand = *pstr;
- ele.isOperator = FALSE;
- queInsertL(&queRegexp, &ele, sizeof(Element));
- }
- else
- {
- switch (*pstr)
- {
- case '(':
- break;
- case ')':
- if (! stkIsEmptyL(&stkOperator))
- {
- stkPeepL(&c, sizeof(char), &stkOperator);
- while (c != '(')
- {
- stkPopL(&c, sizeof(char), &stkOperator);
- ele.data.operand = c;
- ele.isOperator = TRUE;
- queInsertL(&queRegexp, &ele, sizeof(Element));
- if (stkIsEmptyL(&stkOperator))
- break;
- else
- stkPeepL(&c, sizeof(char), &stkOperator);
- }
- stkPopL(&c, sizeof(char), &stkOperator); /* Drop right brace. */
- }
- else
- {
- printf("Right brace mismatch.\n\n");
- return 1;
- }
- goto Lbl_PassOperator;
- default:
- if (! stkIsEmptyL(&stkOperator))
- {
- stkPeepL(&c, sizeof(char), &stkOperator);
- while (GetOperator(c) >= GetOperator(*pstr))
- {
- stkPopL(&c, sizeof(char), &stkOperator);
- ele.data.operand = c;
- ele.isOperator = TRUE;
- queInsertL(&queRegexp, &ele, sizeof(Element));
- if (stkIsEmptyL(&stkOperator))
- break;
- else
- stkPeepL(&c, sizeof(char), &stkOperator);
- }
- }
- break;
- }
- stkPushL(&stkOperator, pstr, sizeof(char));
- }
- Lbl_PassOperator:
- turn = FALSE;
- break;
- default:
- ele.data.operand = *pstr;
- ele.isOperator = FALSE;
- queInsertL(&queRegexp, &ele, sizeof(Element));
- turn = FALSE;
- break;
- }
- ++pstr;
- }
- while (! stkIsEmptyL(&stkOperator))
- {
- stkPopL(&c, sizeof(char), &stkOperator);
- if (c == '(')
- {
- puts("Left brace mismatch.\n");
- return 1;
- }
- ele.data.operand = c;
- ele.isOperator = TRUE;
- queInsertL(&queRegexp, &ele, sizeof(Element));
- }
- if (queIsEmptyL(&queRegexp))
- {
- puts("Invalid Expression.\n");
- return 1;
- }
- return 0;
- }
- /*
- * _______ __a__
- * | | | V
- * (A) a (B) ====> (A) (B)
- * |_____|
- */
- Node CombineNode5(char transition)
- {
- Node node;
- node.x = GenerateUniqueState(&machine);
- grpInsertVertexL(&machine, node.x);
- node.y = GenerateUniqueState(&machine);
- grpInsertVertexL(&machine, node.y);
- grpInsertEdgeL(&machine, node.x, node.y, (size_t)transition);
- return node;
- }
- /*
- * ________ __x__ __e__ __y__
- * | | | V/ V/ V
- * (A) xy (B) ====> (A) (C) (D) (B)
- * |______|
- */
- Node CombineNode1(Node nodeA, Node nodeB)
- {
- Node nodeR;
- grpInsertEdgeL(&machine, nodeA.y, nodeB.x, 0); /* Epsilon transition. */
- nodeR.x = nodeA.x;
- nodeR.y = nodeB.y;
- return nodeR;
- }
- /*
- * _________ __e__ __x__ __e__
- * | | | V/ V/ V
- * (A) x|y (B) ====> (A) (C) (D) (B)
- * |_______| | __y___ ^
- * \ | | |
- * \e->(E) (F)--e-/
- */
- Node CombineNode2(Node nodeA, Node nodeB)
- {
- Node nodeR;
- nodeR.x = GenerateUniqueState(&machine);
- grpInsertVertexL(&machine, nodeR.x);
- nodeR.y = GenerateUniqueState(&machine);
- grpInsertVertexL(&machine, nodeR.y);
- grpInsertEdgeL(&machine, nodeR.x, nodeA.x, 0); /* Epsilon transition. */
- grpInsertEdgeL(&machine, nodeR.x, nodeB.x, 0);
- grpInsertEdgeL(&machine, nodeA.y, nodeR.y, 0);
- grpInsertEdgeL(&machine, nodeB.y, nodeR.y, 0);
- return nodeR;
- }
- /*
- * _________ __e__ __x__ __e__
- * | | | V/ V/ V
- * (A) x|y (B) ====> (A) (C) (D) (B)
- * |_______| ^\_______e____>____/|
- * \________e____<_____/
- */
- Node CombineNode3(Node nodeA)
- {
- Node nodeR;
- nodeR.x = GenerateUniqueState(&machine);
- grpInsertVertexL(&machine, nodeR.x);
- nodeR.y = GenerateUniqueState(&machine);
- grpInsertVertexL(&machine, nodeR.y);
- grpInsertEdgeL(&machine, nodeR.x, nodeA.x, 0);
- grpInsertEdgeL(&machine, nodeA.y, nodeR.y, 0);
- grpInsertEdgeL(&machine, nodeR.x, nodeR.y, 0);
- grpInsertEdgeL(&machine, nodeR.y, nodeR.x, 0);
- return nodeR;
- }
- int cbftvsPrint(void * pitem, size_t param)
- {
- P_Element pele = (P_Element)((P_NODE_S)pitem)->pdata;
- if (! pele->isOperator)
- printf(" (%c)", pele->data.operand);
- else
- printf(" %c", pele->data.operator);
- return CBF_CONTINUE;
- }
- //int CBFFindEdgeInList(void * pitem, size_t param)
- //{
- // _P_FIEDG pd = (_P_FIEDG)param;
- // if (((P_EDGE)((P_NODE_S)pitem)->pdata)->weight == pd->vertex.weight)
- // {
- // pd->vertex.vid = ((P_EDGE)((P_NODE_S)pitem)->pdata)->vid;
- // pd->pnode = (P_NODE_S)pitem;
- // return CBF_TERMINATE;
- // }
- // return CBF_CONTINUE;
- //}
- //
- //extern P_VERTEX_L _grpGetVertexByID(P_GRAPH_L pgrp, size_t vid);
- //
- //ptrdiff_t FSA_Status(P_GRAPH_L pgrp, size_t vidx, char transition)
- //{
- // REGISTER P_VERTEX_L pvtx;
- // if (NULL != (pvtx = _grpGetVertexByID(pgrp, vidx)))
- // {
- // _FIEDG fd;
- // extern int _grpCBFFindEdgeInList(void * pitem, size_t param);
- // fd.pnode = NULL;
- // fd.vertex.weight = transition;
- //
- // if (CBF_TERMINATE == strTraverseLinkedListSC_X(pvtx->adjlist, NULL, CBFFindEdgeInList, (size_t)&fd))
- // return fd.vertex.vid; /* Edge already exists. */
- // }
- // return -1; /* Can not find vertex vidx. */
- //}
- int cbftvsvtxedg(void * pitem, size_t param)
- {
- P_NODE_S pnode = (P_NODE_S)pitem;
- P_EDGE pedge = (P_EDGE)pnode->pdata;
- printf("\t\'\\%lu\'\t(%lu)\n", pedge->weight, pedge->vid);
- return CBF_CONTINUE;
- }
- int cbftvsgrp(void * pitem, size_t param)
- {
- P_TNODE_B tnodeb = P2P_TNODE_B(pitem);
- P_VERTEX_L pvtx = (P_VERTEX_L)tnodeb->pdata;
- P_Node pnode = (P_Node)param;
- if (pvtx->vid == pnode->x)
- printf(">(%lu)\n", pvtx->vid); /* Start status. */
- else if (pvtx->vid == pnode->y)
- printf("*(%lu)\n", pvtx->vid); /* End status. */
- else
- printf(" (%lu)\n", pvtx->vid);
- strTraverseLinkedListSC_A(pvtx->adjlist, NULL, cbftvsvtxedg, 0);
- return CBF_CONTINUE;
- }
- int main(void)
- {
- Node node;
- STACK_L operandStack;
- //char exp[] = { "a(a|b)*" };
- char exp[BUFSIZ] = { 0 };
- stkInitL(&stkOperator);
- stkInitL(&stkOperand);
- queInitL(&queRegexp);
- stkInitL(&operandStack);
- grpInitL(&machine);
- printf("Regexp< a(a|d)* >: ");
- scanf_s("%s", exp, BUFSIZ);
- Splitter(exp);
- printf("RPN: ");
- strTraverseLinkedListSC_N(queRegexp.pfront, NULL, cbftvsPrint, 0);
- printf("\n");
- while (!queIsEmptyL(&queRegexp))
- {
- Element ele;
- queRemoveL(&ele, sizeof ele, &queRegexp);
- if (ele.isOperator)
- {
- Node nodeA, nodeB;
- switch (ele.data.operator)
- {
- case '|':
- if (!stkIsEmptyL(&operandStack))
- stkPopL(&nodeA, sizeof nodeA, &operandStack);
- else
- {
- printf("Expression Error!\n");
- return 1;
- }
- if (!stkIsEmptyL(&operandStack))
- stkPopL(&nodeB, sizeof nodeB, &operandStack);
- else
- {
- printf("Expression Error!\n");
- return 2;
- }
- nodeA = CombineNode2(nodeA, nodeB);
- stkPushL(&operandStack, &nodeA, sizeof nodeA);
- break;
- case '*':
- if (!stkIsEmptyL(&operandStack))
- stkPopL(&nodeA, sizeof nodeA, &operandStack);
- else
- {
- printf("Expression Error!\n");
- return 3;
- }
- nodeA = CombineNode3(nodeA);
- stkPushL(&operandStack, &nodeA, sizeof nodeA);
- break;
- }
- }
- else
- {
- node = CombineNode5(ele.data.operand);
- stkPushL(&operandStack, &node, sizeof node);
- }
- }
- /* Deduction: */
- while (!stkIsEmptyL(&operandStack))
- {
- Node nodeA, nodeB;
- stkPopL(&nodeA, sizeof nodeA, &operandStack);
- if (!stkIsEmptyL(&operandStack))
- stkPopL(&nodeB, sizeof nodeB, &operandStack);
- else
- {
- stkPushL(&operandStack, &nodeA, sizeof nodeA);
- break;
- }
- nodeA = CombineNode1(nodeA, nodeB);
- stkPushL(&operandStack, &nodeA, sizeof nodeA);
- }
- /* Pour the last item out of the stack. */
- if (!stkIsEmptyL(&operandStack))
- stkPopL(&node, sizeof node, &operandStack);
- else
- {
- printf("Expression Error!\n");
- return 4;
- }
- printf("Status\tTransition\tStatus\n");
- treTraverseBIn(machine, cbftvsgrp, &node);
- stkFreeL(&stkOperator);
- stkFreeL(&stkOperand);
- queFreeL(&queRegexp);
- stkFreeL(&operandStack);
- grpFreeL(&machine);
- return 0;
- }
复制代码 |
|