Читайте данную работу прямо на сайте или скачайте
Препроцессор языка СИ
Л ¡оà âоàн ï à ¡оâ 1
З д н¨¥:
оáâàо¨âì л¥ªá¨ç¥áª¨й н л¨з âоà (᪠н¥à), ªоâоàëй ¡ãд¥â ¢
д лìн¥йè¥м ¨á¯олìзо¢ âìáï ¯à¨ à з¡оॠHTML-доªãм¥нâо¢.
Т¥ªáâ ¯àо£à ммë:
#include <io.h>
#include <ctype.h>
#include <string.h>
#include <alloc.h>
#include "parser\htm_cnst.h"
#include "parser\htm_glob.h"
#include "parser\htm_tokn.h"
#include "parser\htm_err.h"
#include "parse.h"
#pragma hdrstop
extern YYSTYPE yylval;
enum {TEXT=0, PRE, KEYWORD, ATTR, AVALUE, IN_QUOTE};
char *states [ 6] = {"TEXT", "PRE", "KEYWORD", "ATTR", "AVALUE", "IN_QUOTE"};
char *tktypes[10] = {"_OpenTag", "_CloseTag", "_EndTag", "_C_KEYWORD",
"_S_KEYWORD", "_A_KEYWORD", "_V_KEYWORD", "_NUM",
"_IDENTIFIER", "_QUOTED_ATTR"};
/* text, preformatted text, HTML KEYWORD, attribute KEYWORD, */
/* attribute value KEYWORD */
int in_close = FALSE;
int in_open = FALSE;
int opened_par = FALSE;
int state = TEXT;
int old_state = TEXT;
int cUKSZ = 0;
int USE_BUFFER = FALSE;
int lex_buff_size = 0;
typedef struct {
int tktyp;
char tkval[NMSZ];
} tbuff;
tbuff lex_buff[5];
intа c;
intа lineno;
long charno;
long f_size;
intа icm;
/* !!!!!! */
char comment[CMSZ];
/* !!!!!! */
typedef struct {
char name[TKSZ];
intа kw_token;
intа in_paragraph;
} kw_table;
/********** functions declarations ************/
void fixfile(FILE *, char*);
intа nextchar(FILE *, FILE *);
void nlproc (FILE *);
intа bsearch (char *, kw_table *, int);
intа nexttok (char *);
void lexinit();
intа yylex();
/**********************************************/
/* Possible KEYWORDS - directives */
kw_table keyword_table[KWSZ] = {
- 2 -
{"A", _C_KEYWORD, TRUEа },
//...
{"WBR", _S_KEYWORD, TRUEа }
};
/* Possible KEYWORDS - attributes */
kw_table attr_table[ATSZ] = {
{"ALIGN", _A_KEYWORD },
//...
{"WRAP", _A_KEYWORD }
};
/* Possible KEYWORDS - attribute's values */
kw_table aval_table[AVSZ] = {
{"ABSBOTTOM", _V_KEYWORD },
//...
{"_top", _V_KEYWORD }
};
/*
* äãнªæ¨¨
*/
/*
* Фãнªæ¨ï ¡¨н àно£о ¯о¨áª ¨м¥н¨ ¢ â ¡л¨æ¥ ¨м¥н. Воз¢à é ¥â ¨нд¥ªá
* н йд¥нно£о íл¥м¥нâ ¢ м áᨢ¥ ¨л¨ -1, ¥áл¨ н¨ç¥£о н¥ н èл .
* Œ áᨢ áоáâо¨â ¨з з ¯¨á¥й á ¯олïм¨ : áâàоªо¢ой ASCII/Z-ªонáâ нâë -
* ªлî祢о£о áло¢ HTML ¨ int'о¢áªо£о ⨯ íâо£о KEYWORD'
*/
int bsearch( char word[], kw_table word_table[], int tbsize)
{
int low, high, middle;
int i;
low = 0;
high = tbsize - 1;
while (low <= high) {
middle = (low + high) / 2;
i = strcmp(word, word_table[middle].name);
if (i < 0) {
high = middle - 1;
} else if (i > 0) {
low = middle + 1;
} else {
return (middle);
}
}
return (-1);
}
int nexttok(char *val)
{
register int i, i2;
char tokenvalue[NMSZ];
int c1, tokentype, tokenend;
i = 0;
while (isspace(c)) /* c is always one char ahead */ {
if (c == '\n') nlproc(listfp);
if (state == PRE) {
tokenvalue[0] = c; tokenvalue[1] = '\0';
tokentype = _IDENTIFIER;
c = nextchar(infp, listfp); charno++;
strcpy(val, tokenvalue);а return(tokentype);
}
c = nextchar(infp, listfp); charno++;
- 3 -
}
if ( ( (state == TEXT) || (state == PRE) ) && (c == '<') ) {
ungetc(c1 = getc(infp), infp);
old_state = state; state = KEYWORD;
if (c1 == '\/') {
c = nextchar(infp, listfp); charno++;
c = nextchar(infp, listfp); charno++;
tokenvalue[0] = '<'; tokenvalue[1] = '\/'; tokenvalue[2] = '\0';
tokentype = _CloseTag; in_close = TRUE; in_open = FALSE;
strcpy(val, tokenvalue); return(tokentype);
} else {
c = nextchar(infp, listfp); charno++;
tokenvalue[0] = '<';а tokenvalue[1] = '\0';
tokentype = _OpenTag;а in_close = FALSE; in_open = TRUE;
strcpy(val, tokenvalue); return(tokentype);
}
}
if (state == KEYWORD) {
if (c == '\!') { /*а Comments! */
while ((c != '>') && ( c != EOF )) {
c = nextchar(infp, listfp); charno++;
}
state = ATTR;
tokenvalue[0] = '\0'; tokentype = _IDENTIFIER;
strcpy(val, tokenvalue); return(tokentype);
}
while (isalnum(c)) {
tokenvalue[i++] = toupper(c);
c = nextchar(infp, listfp); charno++;
}
tokenvalue[i++] = '\0';
if ((i = bsearch(tokenvalue, keyword_table, KWSZ)) >= 0) {
tokentype = keyword_table[i].kw_token;
state = ATTR;
if ( strcmp("PRE", tokenvalue) == 0 ) {
if (in_close) old_state = TEXT;
else а old_state = PRE;
}
strcpy(val, tokenvalue); return(tokentype);
} else {
if ((i = bsearch(tokenvalue, attr_table, ATSZ)) >= 0) {
tokentype = attr_table[i].kw_token;
state = AVALUE;
strcpy(val, tokenvalue); return(tokentype);
} else { /* Unknown attribute. Actually, it's much more easier */
а /* just ignoring it in YACC then trying to skip it here*/
tokentype = _IDENTIFIER;
state = AVALUE;
strcpy(val, tokenvalue); return(tokentype);
}
}
}
if (state == IN_QUOTE) {
if ( c == '\"' ) {
c = nextchar(infp, listfp); charno++;
state = ATTR;
tokenvalue[0] = '\"'; tokenvalue[1] = '\0';
tokentype = '\"';
strcpy(val, tokenvalue); return(tokentype);
}
tokentype = _QUOTED_ATTR;а /* maybe URL, maybe rain, maybe snow... */
/* state = IN_QUOTE; */
while ( c != '\"' ) {
tokenvalue[i++] = c;
c = nextchar(infp, listfp);а charno++;
}
- 4 -
tokenvalue[i++] = '\0';
/* c = nextchar(infp, listfp); Skip the closing quotation mark */
strcpy(val, tokenvalue); return(tokentype);
} /* end if for (state == IN_QUOTE) */
return 0; /* Т ªо£о н¥ мож¥â ¡ëâì... */
}
int yylex()
{
intа tktyp;
char tkval[NMSZ];
if (!USE_BUFFER) {
tktyp = nexttok(tkval);
switch (tktyp) {
case _OpenTag: {
tktyp = nexttok(tkval);
if (opened_par) {
if (tktyp == _C_KEYWORD || tktyp == _S_KEYWORD) {
а if (!keyword_table[bsearch(tkval, keyword_table, KW...
lex_buff[0].tktyp = tktyp а ; strcpy(lex_buff[...
lex_buff[1].tktyp = _OpenTagа ; strcpy(lex_buff[...
lex_buff[2].tktyp = _EndTag ; strcpy(lex_buff[...
lex_buff[3].tktyp = _C_KEYWORD; strcpy(lex_buff[...
lex_buff[4].tktyp = _CloseTag ; strcpy(lex_buff[...
lex_buff_size = 5; USE_BUFFER = TRUE;
opened_par = (strcmp(tkval, "P")==0);
а } else {
lex_buff[0].tktyp = tktyp а ; strcpy(lex_buff[...
lex_buff[1].tktyp = _OpenTagа ; strcpy(lex_buff[...
lex_buff_size = 2; USE_BUFFER = TRUE;
а }
}
} else { // i.e. opened_par == FALSE
opened_par = (strcmp(tkval, "P")==0);
lex_buff[0].tktyp = tktyp ; strcpy(lex_buff[0].tkv...
lex_buff[1].tktyp = _OpenTagа ; strcpy(lex_buff[1].tkv...
lex_buff_size = 2; USE_BUFFER = TRUE;
}
break;
}
case _CloseTag: {
tktyp = nexttok(tkval);
if (opened_par) {
if (keyword_table[bsearch(tkval, keyword_table, KWSZ)]...
а lex_buff[0].tktyp = tktyp ; strcpy(lex_buff[0]....
а lex_buff[1].tktyp = _CloseTag ; strcpy(lex_buff[1]....
а lex_buff_size = 2; USE_BUFFER = TRUE;
} else {
а if (strcmp(tkval, "P")==0) {
lex_buff[0].tktyp = tktyp а ; strcpy(lex_buff[...
lex_buff[1].tktyp = _CloseTag ; strcpy(lex_buff[...
lex_buff_size = 2; USE_BUFFER = TRUE;
opened_par = FALSE;
а } else {
lex_buff[0].tktyp = tktyp а ; strcpy(lex_buff[...
lex_buff[1].tktyp = _CloseTag ; strcpy(lex_buff[...
lex_buff[2].tktyp = _EndTag ; strcpy(lex_buff[...
lex_buff[3].tktyp = _C_KEYWORD; strcpy(lex_buff[...
lex_buff[4].tktyp = _CloseTag ; strcpy(lex_buff[...
а lex_buff_size = 5; USE_BUFFER = TRUE;
opened_par = FALSE;
а }
}
} else {
lex_buff[0].tktyp = tktyp ; strcpy(lex_buff[0].tkv...
lex_buff[1].tktyp = _CloseTag ; strcpy(lex_buff[1].tkv...
- 5 -
lex_buff_size = 2; USE_BUFFER = TRUE;
}
break;
}
case _IDENTIFIER: {
if ( !(in_open || in_close) && (!opened_par)) {
lex_buff[0].tktyp = _IDENTIFIER; strcpy(lex_buff[0].tkv...
lex_buff[1].tktyp = _EndTag ; strcpy(lex_buff[1].tkv...
lex_buff[2].tktyp = _C_KEYWORD ; strcpy(lex_buff[2].tkv...
lex_buff[3].tktyp = _OpenTag ; strcpy(lex_buff[3].tkv...
lex_buff_size = 4; USE_BUFFER = TRUE;
opened_par = TRUE;
}
break;
}
case 0: { // EOF
if (opened_par) {
lex_buff[0].tktyp = tktyp ; strcpy(lex_buff[0].tkv...
lex_buff[1].tktyp = _EndTag ; strcpy(lex_buff[1].tkv...
lex_buff[2].tktyp = _C_KEYWORD ; strcpy(lex_buff[2].tkv...
lex_buff[3].tktyp = _CloseTagа ; strcpy(lex_buff[3].tkv...
lex_buff_size = 4; USE_BUFFER = TRUE;
opened_par = TRUE;
}
}
}
}
if (USE_BUFFER) {
tktyp = lex_buff[--lex_buff_size].tktyp;
strcpy(tkval, lex_buff[а lex_buff_size].tkval);
if (lex_buff_size == 0) USE_BUFFER = FALSE;
}
yylval.pchar = (char *) malloc(1 + strlen(tkval));
strcpy(yylval.pchar, tkval);
return(tktyp);
}
à¨м¥ç н¨¥: à ᯥç ⪨ ä йло¢ htm_cnst.h, htm_glob.h, htm_tokn.h ¨
htm_err.h áм. ¢ ¯à¨лож¥н¨¨ И apendix' 4.4.
- 6 -
Л ¡оà âоàн ï à ¡оâ 2
З д н¨¥:
а оáâàо¨âì äоàм â¨з âоà С-ä йло¢. ¥о¡åод¨мо ॠл¨зо¢ âì
áл¥дãî騥 äãнªæ¨¨:
-а `{` - ¢á¥£д c но¢ой áâàоª¨ ¡¥з оâáâã¯
-а `{` - н¨ªо£д н¥ ¯¥à¥ноá¨âáï
-а `{` - ¢á¥£д c но¢ой áâàоª¨ á оâáâã¯ом
-а 0 - ¢áâ ¢лïâì á¨м¢ол TAB ¯à¨ оâáâ㯥
-а 1..8а ¢áâ ¢лïâì x ¯àо¡¥ло¢
а -а äоàм â¨àо¢ âì ªомм¥нâ ਨ c xx ¯о yy ¯оз¨æ¨¨
-а н¥áªолìªо ªом нд н одной áâàоª¥
-а `=` ¢ëд¥лïâì ¯àо¡¥л м¨
Т¥ªáâ ¯àо£à ммë:
#include <stdio.h>
#include <dos.h>
#include <ctype.h>
#include <string.h>
#define ERROR а 0
#define IDENT а 1
#define KEYWORD 2
#define BRACKETSа 3
#define OTHER а 4
#define BEGIN а '{'
#define END а '}'
#define COMMA а ','
#define SEMI а ';'
#define LBа а '\n'
char val[100];
char comment[200];
intа pos=0;
intа undo;
intа backspace;
#define N_KW 8
char keywords[N_KW][20]={
"for",
"while",
"do",
"if",
"switch",
"else",
"case",
"default"};
// -----------------------------------------------------------------
void blockQuote (char *(&s),FILE *f)
{
char c=fgetc(f);
*(s++)=c, c=0;
while (c!='\'' && c!='\"') {
c=*(s++)=fgetc(f);
if (c=='\\')
а c=*(s++)=fgetc(f);
}
}
// -----------------------------------------------------------------
int getNext (FILE *f)
{
char *s=val,c;
int buf=undo;
static int symbol=0;
static int startPos=1;
c=fgetc(f);
- 7 -
startPos=0;
if (isalnum(c) || c=='_') {
symbol=0;
*(s++)=c;
while ((isalnum(c) || c=='_' || c=='.') && !feof(f) && !isspace(c))
а c=*(s++)=fgetc(f);
ungetc (c,f);
*(--s)=0;
for (int i=0;i<N_KW && strcmp(keywords[i],val);i++);
if (i!=N_KW) *(s++)=' ';
*s=0;
if (i!=N_KW) return KEYWORD;
return IDENT;
}
if (c=='\'' || c=='\"') {
symbol=0;
ungetc(c,f);
blockQuote (s,f);
*s=0;
return IDENT;
}
if (c=='=' && E && !symbol)
{
*(s++)=' ';
*(s++)=c;
*(s++)=' ';
*s=0;
return OTHER;
}
*s=c, *(s+1)=0;
symbol=1;
return OTHER;
}
void outVal (FILE *f,int back=0)
{
char *s=val;
if (backspace && !back)
if (!T) {
fputc (9,f);
pos+=F3;
}
else {
for (int j=0;j<T;j++)
{
а fputc (' ',f);
а pos++;
}
}
backspace=0;
while (*s) {
fputc (*(s++),f);
pos++;
}
}
void outCR (FILE *f,int tab,int newLine=0)
{
int limit=tab-1+newLine;
// Out Comments
if (*comment)
outComment (f);
if (!newLine) {
fputc ('\n',f);
pos=0;
- 8 -
}
for (int i=0;i<limit;i++)
{
if (!T) {
а fputc (9,f);
а pos+=F3;
}
else {
а for (int j=0;j<T;j++)а {
fputc (' ',f);
pos++;
а }
}
}
if (tab && !newLine)
backspace=1;
}
// -----------------------------------------------------------------
int format (FILE *f_in,FILE *f_out)
{
int tab=0,tab1=0,tab2=0,lb=0;
int lex;
while (!feof(f_in))
{
lex=getNext(f_in);
switch (lex) {
а case KEYWORD: {
lb=0;
outVal (f_out);
lex=getNext(f_in);
break;
а }
а case BEGIN: {
tab1=0;
if (!lb && N!=2) outCR (f_out,tab);
if (N==3) outCR (f_out,1,1);
outVal(f_out);
outCR (f_out,++tab);
lb=1;
break;
а }
а case END: {
if (!lb) outCR (f_out,tab);
lb=1;
tab1=0;
lex=getNext (f_in);
if (lex==LB) lex=getNext(f_in);
undo=lex;
outCR (f_out,tab);
lb=1;
break;
а }
а case SEMI: {
lb=0;
tab1=0;
outVal (f_out);
lex=getNext(f_in);
undo=lex;
break;
а }
а case IDENT: {
lb=0;
outVal (f_out);
lex=getNext (f_in);
undo=lex;
break;
а }
а case LB: {
- 9 -
int used;
tab1=0;
if (*comment) {
outCR (f_out,tab);
used=1;
}
lex=getNext (f_in);
if (N!=2 || lex!=BEGIN) {
if (!used) outCR (f_out,tab);
lb=1;
}
undo=lex;
break;
а }
а case EOF:
return 1;
а default: {
lb=0;
outVal(f_out);
а }
}
}
return 1;
}
à¨м¥ç н¨¥: ¥о¡åод¨мë¥ äãнªæ¨¨ ¡ëл¨ à¥ л¨зо¢ нë ¢ ¯олном о¡ê¥м¥
¨ ¯à¨ áд ç¥ н ४ н¨й н¥ ¢ëз¢ л¨. (Т¥áâо¢ëй ¯à¨м¥à
¯à¨¢¥д¥н ¢ ¯à¨лож¥н¨¨ apendix' 4.5)
- 10 -
Л ¡оà âоàн ï à ¡оâ 3
З д н¨¥:
àо н л¨з¨о¢ âì åä ¨ ¢ë¡ âì лãçèãî длï áлãç йно£о à ᯥд¥л¥н¨ï ¨д-о¢
¡о¡é¥нн ï äомãл ¢ëç¨áл¥н¨ï åä:
h0 = 0;
h(i) = Alf* h(i-1) [+] C(i), i=1..k
k - \дл¨н áâàоª¨. [+] - н¥ªоâо ï ¯àо¨з¢олìн ï о¯¥à æ¨ï
а (+, -, <<, _rotl, ^ |, &)
а
àо н л¨з¨о¢ âì í⨠¨ лî¡ë¥ дà㣨¥ åä н ªол¨ç¥áâ¢о ªолл¨з¨й (ªонäл¨ªâо¢)
длï н¥ªоâоàëå áлãç йнëå ¯оáл¥до¢ â¥лìноáâì. ¯à¨м¥à:
1) id'ë ïзëª C (до 50).
2) ---- "" ---- (до 100).
3) ---- "" ---- (до 1).
4) Вн¥èн¨¥ ¨м¥н áâ нд âной ¡¨¡ло¨оâ⥪¨ BC++.
5) Вн¥èн¨¥ ¨м¥н £ ä¨ç¥áªой ¡¨¡л¨о⥪¨ BC++.
6) Слãç йно £¥н¥¨ã¥мë¥ ¨м¥н (~600)
7) Ин£л¨й᪨¥ ááло¢ á ¯¥ä¨ªá м¨ ¨/¨л¨ áãä䨪á м¨ () - оªоло 200
8) 300 ¨м¥н ¢¨д : w, w001, w002, etc
Ин л¨з áâ â¨á⨪¨ ᢥá⨠¢ â ¡л¨æã/£à 䨪
Т¥ªá ¯àо£à ммë н л¨з âоà :
#include <stdio.h>
#include <stdlib.h>
unsigned hash_shift(char *s)
{
unsigned hash = 0;
while (*s)
hash = (hash << 1) + *s++;
return hash;
}
unsigned hash_rotl(char *s)
{
unsigned hash = 0;
while (*s)
hash = _rotl(hash,1) ^ *s++;
return hash;
}
long hash_pgw(char *s)
{
char *p;
unsigned long h = 0, g;
for(p=s;*p;p++)
{
h = (h << 4) +а *p;
if (g = h & 0xF)
{
а h = h^(g >>24);
а h = h^g;
}
}
return h;
}
unsigned hash_4 (char *s)
{
unsigned h = 0;
const D=5;
- 11 -
while (*s)
h = D*h + *s++;
return h;
}
main (int argc,char **argv)
{
int n=0,total=0;
int placed1=0,placed2=0,placed3=0,placed4=0;
int max1=0,max2=0,max3=0,max4=0;
int *table1,*table2,*table3,*table4;
FILE *f;
char buf[100];
puts ("Л ¡оà âоàн ï à ¡оâ 3. ˆááл¥до¢ н¨¥ åíè-äãнªæ¨¨.");
puts ("Лï¯ãно¢ ˆ.В. И-61");
if (argc<3)
{
puts ("\nа HASH.COM <n> <file_name>");
puts (" n - ç¨áло íл¥м¥нâо¢ ¢ â ¡л¨æ¥");
puts (" file_name - ¨мï ä йл á ¨д¥нâ¨ä¨ª âоà м¨ \n");
return 0;
}
n=atoi(argv[1]);
if (n<10)
{
puts ("\n Сл¨èªом м л¥нìª ï â ¡л¨æ \n");
return 0;
}
table1=(int*)calloc(n,sizeof(int));
table2=(int*)calloc(n,sizeof(int));
table3=(int*)calloc(n,sizeof(int));
table4=(int*)calloc(n,sizeof(int));
if (!table1 || !table2 || !table3 || !table4)
{
puts ("\n¥ å¢ â ¥â ¯ мï⨠длï â ¡л¨æ\n");
return 0;
}
if ((f=fopen(argv[2],"r"))==NULL)
{
puts ("\n¥ мо£ã оâªàëâì 㪠з ннëй ä йл\n");
return 0;
}
while (!feof(f))
if (fscanf (f,"%s",buf)==1)
{
а table1[hash_shift(buf)%n]++;
а table2[hash_rotl(buf)%n]++;
а table3[hash_pgw(buf)%n]++;
а table4[hash_4(buf)%n]++;
а puts (buf);
а total++;
}
puts ("Сâ â¨á⨪ :");
for (int i=0;i<n;i++)
{
if (table1[i]) {
а if (max1<table1[i]) max1=table1[i];
а placed1++;
}
if (table2[i]) {
а if (max2<table2[i]) max2=table2[i];
а placed2++;
}
if (table3[i]) {
а if (max3<table3[i]) max3=table3[i];
а placed3++;
}
- 12 -
if (table4[i]) {
а if (max4<table4[i]) max4=table4[i];
а placed4++;
}
}
printf (" а HASH_SHIFT : %fа max=%d\n",(float)total/placed1,max1);
printf (" а HASH_ROTLа : %fа max=%d\n",(float)total/placed2,max2);
printf (" а HASH_PGW : %fа max=%d\n",(float)total/placed3,max3);
printf (" а HASH_4 : %fа max=%d\n",(float)total/placed4,max4);
fclose (f);
return 0;
}