Скачать работу в формате MO Word.

Препроцессор языка СИ

Л ¡оà âоàн ï à ¡оâ  1


З д н¨¥:


оáâàо¨âì л¥ªá¨ç¥áª¨й  н л¨з âоà (᪠н¥à), ªоâоàëй ¡ãд¥â ¢

д лìн¥йè¥м ¨á¯олìзо¢ âìáï ¯à¨ à з¡оॠHTML-доªãм¥нâо¢.


Т¥ªáâ ¯àо£à ммë:


#include <io.h>

#include <ctype.h>

#include <string.h>

#include <alloc.h>


#include "parser\htm_cnst.h"

#include "parser\htm_glob.h"

#include "parser\htm_tokn.h"

#include "parser\htm_err.h"

#include "parse.h"

#pragma hdrstop


extern YYSTYPE yylval;

enum {TEXT=0, PRE, KEYWORD, ATTR, AVALUE, IN_QUOTE};

char *states [ 6] = {"TEXT", "PRE", "KEYWORD", "ATTR", "AVALUE", "IN_QUOTE"};

char *tktypes[10] = {"_OpenTag", "_CloseTag", "_EndTag", "_C_KEYWORD",

"_S_KEYWORD", "_A_KEYWORD", "_V_KEYWORD", "_NUM",

"_IDENTIFIER", "_QUOTED_ATTR"};


/* text, preformatted text, HTML KEYWORD, attribute KEYWORD, */

/* attribute value KEYWORD */

int in_close = FALSE;

int in_open = FALSE;

int opened_par = FALSE;

int state = TEXT;

int old_state = TEXT;

int cUKSZ = 0;


int USE_BUFFER = FALSE;

int lex_buff_size = 0;

typedef struct {

int tktyp;

char tkval[NMSZ];

} tbuff;

tbuff lex_buff[5];


intа c;

intа lineno;

long charno;

long f_size;

intа icm;

/* !!!!!! */

char comment[CMSZ];

/* !!!!!! */


typedef struct {

char name[TKSZ];

intа kw_token;

intа in_paragraph;

} kw_table;


/********** functions declarations ************/

oid fixfile(FILE *, char*);

intа nextchar(FILE *, FILE *);

oid nlproc (FILE *);

intа bsearch (char *, kw_table *, int);

intа nexttok (char *);

oid lexinit();

intа yylex();

/**********************************************/


/* Possible KEYWORDS - directives */

kw_table keyword_table[KWSZ] = {



- 2 -


{"A", _C_KEYWORD, TRUEа },

//...

{"WBR", _S_KEYWORD, TRUEа }

};


/* Possible KEYWORDS - attributes */

kw_table attr_table[ATSZ] = {

{"ALIGN", _A_KEYWORD },

//...

{"WRAP", _A_KEYWORD }

};


/* Possible KEYWORDS - attribute's values */

kw_table aval_table[AVSZ] = {

{"ABSBOTTOM", _V_KEYWORD },

//...

{"_top", _V_KEYWORD }

};


/*

* äãнªæ¨¨

*/


/*

* Фãнªæ¨ï ¡¨н àно£о ¯о¨áª  ¨м¥н¨ ¢ â ¡л¨æ¥ ¨м¥н. Воз¢à é ¥â ¨нд¥ªá

* н йд¥нно£о íл¥м¥нâ  ¢ м áᨢ¥ ¨л¨ -1, ¥áл¨ н¨ç¥£о н¥ н èл .

* Œ áᨢ áоáâо¨â ¨з з ¯¨á¥й á ¯олïм¨ : áâàоªо¢ой ASCII/Z-ªонáâ нâë -

* ªлî祢о£о áло¢  HTML ¨ int'о¢áªо£о ⨯  íâо£о KEYWORD' 

*/


int bsearch( char word[], kw_table word_table[], int tbsize)

{

int low, high, middle;

int i;


low = 0;

high = tbsize - 1;


while (low <= high) {

middle = (low + high) / 2;

i = strcmp(word, word_table[middle].name);

if (i < 0) {

high = middle - 1;

} else if (i > 0) {

low = middle + 1;

} else {

return (middle);

}

}


return (-1);

}


int nexttok(char *val)

{

register int i, i2;

char tokenvalue[NMSZ];

int c1, tokentype, tokenend;


i = 0;


while (isspace(c)) /* c is always one char ahead */ {

if (c == '\n') nlproc(listfp);

if (state == PRE) {

tokenvalue[0] = c; tokenvalue[1] = '\0';

tokentype = _IDENTIFIER;

c = nextchar(infp, listfp); charno++;

strcpy(val, tokenvalue);а return(tokentype);

}

c = nextchar(infp, listfp); charno++;



- 3 -


}


if ( ( (state == TEXT) || (state == PRE) ) && (c == '<') ) {

ungetc(c1 = getc(infp), infp);

old_state = state; state = KEYWORD;

if (c1 == '\/') {

c = nextchar(infp, listfp); charno++;

c = nextchar(infp, listfp); charno++;

tokenvalue[0] = '<'; tokenvalue[1] = '\/'; tokenvalue[2] = '\0';

tokentype = _CloseTag; in_close = TRUE; in_open = FALSE;

strcpy(val, tokenvalue); return(tokentype);

} else {

c = nextchar(infp, listfp); charno++;

tokenvalue[0] = '<';а tokenvalue[1] = '\0';

tokentype = _OpenTag;а in_close = FALSE; in_open = TRUE;

strcpy(val, tokenvalue); return(tokentype);

}

}

if (state == KEYWORD) {


if (c == '\!') { /*а Comments! */

while ((c != '>') && ( c != EOF )) {

c = nextchar(infp, listfp); charno++;

}

state = ATTR;

tokenvalue[0] = '\0'; tokentype = _IDENTIFIER;

strcpy(val, tokenvalue); return(tokentype);

}


while (isalnum(c)) {

tokenvalue[i++] = toupper(c);

c = nextchar(infp, listfp); charno++;

}

tokenvalue[i++] = '\0';

if ((i = bsearch(tokenvalue, keyword_table, KWSZ)) >= 0) {

tokentype = keyword_table[i].kw_token;

state = ATTR;

if ( strcmp("PRE", tokenvalue) == 0 ) {

if (in_close) old_state = TEXT;

else аold_state = PRE;

}

strcpy(val, tokenvalue); return(tokentype);

} else {

if ((i = bsearch(tokenvalue, attr_table, ATSZ)) >= 0) {

tokentype = attr_table[i].kw_token;

state = AVALUE;

strcpy(val, tokenvalue); return(tokentype);

} else { /* Unknown attribute. Actually, it's much more easier */

а/* just ignoring it in YACC then trying to skip it here*/

tokentype = _IDENTIFIER;

state = AVALUE;

strcpy(val, tokenvalue); return(tokentype);

}


}

}

if (state == IN_QUOTE) {

if ( c == '\"' ) {

c = nextchar(infp, listfp); charno++;

state = ATTR;

tokenvalue[0] = '\"'; tokenvalue[1] = '\0';

tokentype = '\"';

strcpy(val, tokenvalue); return(tokentype);

}

tokentype = _QUOTED_ATTR;а /* maybe URL, maybe rain, maybe snow... */

/* state = IN_QUOTE; */

while ( c != '\"' ) {

tokenvalue[i++] = c;

c = nextchar(infp, listfp);а charno++;

}



- 4 -


tokenvalue[i++] = '\0';

/* c = nextchar(infp, listfp); Skip the closing quotation mark */

strcpy(val, tokenvalue); return(tokentype);


} /* end if for (state == IN_QUOTE) */


return 0; /* Т ªо£о н¥ мож¥â ¡ëâì... */

}


int yylex()

{

intа tktyp;

char tkval[NMSZ];


if (!USE_BUFFER) {

tktyp = nexttok(tkval);

switch (tktyp) {

case _OpenTag: {

tktyp = nexttok(tkval);

if (opened_par) {

if (tktyp == _C_KEYWORD || tktyp == _S_KEYWORD) {

аif (!keyword_table[bsearch(tkval, keyword_table, KW...

lex_buff[0].tktyp = tktyp ; strcpy(lex_buff[...

lex_buff[1].tktyp = _OpenTagа ; strcpy(lex_buff[...

lex_buff[2].tktyp = _EndTag ; strcpy(lex_buff[...

lex_buff[3].tktyp = _C_KEYWORD; strcpy(lex_buff[...

lex_buff[4].tktyp = _CloseTag ; strcpy(lex_buff[...

lex_buff_size = 5; USE_BUFFER = TRUE;

opened_par = (strcmp(tkval, "P")==0);

а} else {

lex_buff[0].tktyp = tktyp ; strcpy(lex_buff[...

lex_buff[1].tktyp = _OpenTagа ; strcpy(lex_buff[...

lex_buff_size = 2; USE_BUFFER = TRUE;

а}

}

} else { // i.e. opened_par == FALSE

opened_par = (strcmp(tkval, "P")==0);

lex_buff[0].tktyp = tktyp ; strcpy(lex_buff[0].tkv...

lex_buff[1].tktyp = _OpenTagа ; strcpy(lex_buff[1].tkv...

lex_buff_size = 2; USE_BUFFER = TRUE;

}

break;

}


case _CloseTag: {

tktyp = nexttok(tkval);

if (opened_par) {

if (keyword_table[bsearch(tkval, keyword_table, KWSZ)]...

аlex_buff[0].tktyp = tktyp ; strcpy(lex_buff[0]....

аlex_buff[1].tktyp = _CloseTag ; strcpy(lex_buff[1]....

аlex_buff_size = 2; USE_BUFFER = TRUE;

} else {

аif (strcmp(tkval, "P")==0) {

lex_buff[0].tktyp = tktyp ; strcpy(lex_buff[...

lex_buff[1].tktyp = _CloseTag ; strcpy(lex_buff[...

lex_buff_size = 2; USE_BUFFER = TRUE;

opened_par = FALSE;

а} else {

lex_buff[0].tktyp = tktyp ; strcpy(lex_buff[...

lex_buff[1].tktyp = _CloseTag ; strcpy(lex_buff[...

lex_buff[2].tktyp = _EndTag ; strcpy(lex_buff[...

lex_buff[3].tktyp = _C_KEYWORD; strcpy(lex_buff[...

lex_buff[4].tktyp = _CloseTag ; strcpy(lex_buff[...

аlex_buff_size = 5; USE_BUFFER = TRUE;

opened_par = FALSE;

а}

}

} else {

lex_buff[0].tktyp = tktyp ; strcpy(lex_buff[0].tkv...

lex_buff[1].tktyp = _CloseTag ; strcpy(lex_buff[1].tkv...



- 5 -


lex_buff_size = 2; USE_BUFFER = TRUE;

}


break;

}


case _IDENTIFIER: {

if ( !(in_open || in_close) && (!opened_par)) {

lex_buff[0].tktyp = _IDENTIFIER; strcpy(lex_buff[0].tkv...

lex_buff[1].tktyp = _EndTag ; strcpy(lex_buff[1].tkv...

lex_buff[2].tktyp = _C_KEYWORD ; strcpy(lex_buff[2].tkv...

lex_buff[3].tktyp = _OpenTag ; strcpy(lex_buff[3].tkv...

lex_buff_size = 4; USE_BUFFER = TRUE;

opened_par = TRUE;

}

break;

}


case 0: { // EOF

if (opened_par) {

lex_buff[0].tktyp = tktyp ; strcpy(lex_buff[0].tkv...

lex_buff[1].tktyp = _EndTag ; strcpy(lex_buff[1].tkv...

lex_buff[2].tktyp = _C_KEYWORD ; strcpy(lex_buff[2].tkv...

lex_buff[3].tktyp = _CloseTagа ; strcpy(lex_buff[3].tkv...

lex_buff_size = 4; USE_BUFFER = TRUE;

opened_par = TRUE;

}

}


}

}


if (USE_BUFFER) {

tktyp = lex_buff[--lex_buff_size].tktyp;

strcpy(tkval, lex_buff[а lex_buff_size].tkval);

if (lex_buff_size == 0) USE_BUFFER = FALSE;

}


yylval.pchar = (char *) malloc(1 + strlen(tkval));

strcpy(yylval.pchar, tkval);

return(tktyp);

}


à¨м¥ç н¨¥: à á¯¥ç âª¨ ä йло¢ htm_cnst.h, htm_glob.h, htm_tokn.h ¨

htm_err.h áм. ¢ ¯à¨лож¥н¨¨ И apendix'  4.4.
















- 6 -


Л ¡оà âоàн ï à ¡оâ  2


З д н¨¥:

аоáâàо¨âì äоàм â¨з âоà С-ä йло¢. ¥о¡åод¨мо ॠл¨зо¢ âì

áл¥дãî騥 äãнªæ¨¨:

-а `{` - ¢á¥£д  c но¢ой áâàоª¨ ¡¥з оâáâ㯠

-а `{` - н¨ªо£д  н¥ ¯¥à¥ноá¨âáï

-а `{` - ¢á¥£д  c но¢ой áâàоª¨ á оâáâã¯ом

-а 0 - ¢áâ ¢лïâì á¨м¢ол TAB ¯à¨ оâáâ㯥

-а 1..8а ¢áâ ¢лïâì x ¯àо¡¥ло¢

а-а äоàм â¨àо¢ âì ªомм¥нâ à¨¨ c xx ¯о yy ¯оз¨æ¨¨

-а н¥áªолìªо ªом нд н  одной áâàоª¥

-а `=` ¢ëд¥лïâì ¯àо¡¥л м¨


Т¥ªáâ ¯àо£à ммë:

#include <stdio.h>

#include <dos.h>

#include <ctype.h>

#include <string.h>


#define ERROR 0

#define IDENT 1

#define KEYWORD 2

#define BRACKETSа 3

#define OTHER 4

#define BEGIN '{'

#define END '}'

#define COMMA ','

#define SEMI а ';'

#define LBа '\n'


char val[100];

char comment[200];


intа pos=0;

intа undo;

intа backspace;


#define N_KW 8

char keywords[N_KW][20]={

"for",

"while",

"do",

"if",

"switch",

"else",

"case",

"default"};


// -----------------------------------------------------------------

oid blockQuote (char *(&s),FILE *f)

{

char c=fgetc(f);


*(s++)=c, c=0;

while (c!='\'' && c!='\"') {

c=*(s++)=fgetc(f);

if (c=='\\')

аc=*(s++)=fgetc(f);

}

}

// -----------------------------------------------------------------

int getNext (FILE *f)

{

char *s=val,c;

int buf=undo;

static int symbol=0;

static int startPos=1;


c=fgetc(f);



- 7 -


startPos=0;

if (isalnum(c) || c=='_') {

symbol=0;

*(s++)=c;

while ((isalnum(c) || c=='_' || c=='.') && !feof(f) && !isspace(c))

аc=*(s++)=fgetc(f);

ungetc (c,f);

*(--s)=0;

for (int i=0;i<N_KW && strcmp(keywords[i],val);i++);

if (i!=N_KW) *(s++)=' ';

*s=0;

if (i!=N_KW) return KEYWORD;

return IDENT;

}


if (c=='\'' || c=='\"') {

symbol=0;

ungetc(c,f);

blockQuote (s,f);

*s=0;

return IDENT;

}


if (c=='=' && E && !symbol)

{

*(s++)=' ';

*(s++)=c;

*(s++)=' ';

*s=0;

return OTHER;

}


*s=c, *(s+1)=0;

symbol=1;

return OTHER;

}


oid outVal (FILE *f,int back=0)

{

char *s=val;


if (backspace && !back)

if (!T) {

fputc (9,f);

pos+=F3;

}

else {

for (int j=0;j<T;j++)

{

аfputc (' ',f);

аpos++;

}

}

backspace=0;

while (*s) {

fputc (*(s++),f);

pos++;

}

}


oid outCR (FILE *f,int tab,int newLine=0)

{

int limit=tab-1+newLine;


// Out Comments

if (*comment)

outComment (f);

if (!newLine) {

fputc ('\n',f);

pos=0;



- 8 -


}

for (int i=0;i<limit;i++)

{

if (!T) {

аfputc (9,f);

аpos+=F3;

}

else {

аfor (int j=0;j<T;j++)а {

fputc (' ',f);

pos++;

а}

}

}

if (tab && !newLine)

backspace=1;

}

// -----------------------------------------------------------------


int format (FILE *f_in,FILE *f_out)

{

int tab=0,tab1=0,tab2=0,lb=0;

int lex;


while (!feof(f_in))

{

lex=getNext(f_in);

switch (lex) {

аcase KEYWORD: {

lb=0;

outVal (f_out);

lex=getNext(f_in);

break;

а}

аcase BEGIN: {

tab1=0;

if (!lb && N!=2) outCR (f_out,tab);

if (N==3) outCR (f_out,1,1);

outVal(f_out);

outCR (f_out,++tab);

lb=1;

break;

а}

аcase END: {

if (!lb) outCR (f_out,tab);

lb=1;

tab1=0;

lex=getNext (f_in);

if (lex==LB) lex=getNext(f_in);

undo=lex;

outCR (f_out,tab);

lb=1;

break;

а}

аcase SEMI: {

lb=0;

tab1=0;

outVal (f_out);

lex=getNext(f_in);

undo=lex;

break;

а}

аcase IDENT: {

lb=0;

outVal (f_out);

lex=getNext (f_in);

undo=lex;

break;

а}

аcase LB: {



- 9 -


int used;


tab1=0;

if (*comment) {

outCR (f_out,tab);

used=1;

}

lex=getNext (f_in);

if (N!=2 || lex!=BEGIN) {

if (!used) outCR (f_out,tab);

lb=1;

}

undo=lex;

break;

а}

аcase EOF:

return 1;

аdefault: {

lb=0;

outVal(f_out);

а}

}

}


return 1;

}


à¨м¥ç н¨¥: ¥о¡åод¨мë¥ äãнªæ¨¨ ¡ëл¨ à¥ л¨зо¢ нë ¢ ¯олном о¡ê¥м¥

¨ ¯à¨ áд ç¥ н à¥ª н¨й н¥ ¢ëз¢ л¨. (Т¥áâо¢ëй ¯à¨м¥à

¯à¨¢¥д¥н ¢ ¯à¨лож¥н¨¨  apendix'  4.5)























- 10 -


Л ¡оà âоàн ï à ¡оâ  3


З д н¨¥:


àо н л¨з¨о¢ âì åä ¨ ¢ë¡ âì лãçèãî длï áлãç йно£о à á¯¥д¥л¥н¨ï ¨д-о¢


Ž¡о¡é¥нн ï äомãл  ¢ëç¨áл¥н¨ï åä:


h0 = 0;

h(i) = Alf* h(i-1) [+] C(i), i=1..k

k - \дл¨н  áâàоª¨. [+] - н¥ªоâо ï ¯àо¨з¢олìн ï о¯¥à æ¨ï

(+, -, <<, _rotl, ^ |, &)

àо н л¨з¨о¢ âì í⨠¨ лî¡ë¥ дà㣨¥ åä н  ªол¨ç¥áâ¢о ªолл¨з¨й (ªонäл¨ªâо¢)

длï н¥ªоâоàëå áлãç йнëå ¯оáл¥до¢ â¥лìноáâì.  ¯à¨м¥à:

1) id'ë ïз몠 C (до 50).

2) ---- "" ---- (до 100).

3) ---- "" ---- (до 1).

4) Вн¥èн¨¥ ¨м¥н  áâ нд âной ¡¨¡ло¨оâ⥪¨ BC++.

5) Вн¥èн¨¥ ¨м¥н  £ ä¨ç¥áªой ¡¨¡л¨о⥪¨ BC++.

6) Слãç йно £¥н¥¨ã¥мë¥ ¨м¥н  (~600)

7) Ин£л¨й᪨¥ ááло¢  á ¯¥ä¨ªá м¨ ¨/¨л¨ áãä䨪á м¨ () - оªоло 200

8) 300 ¨м¥н ¢¨д : w, w001, w002, etc

Ин л¨з áâ â¨á⨪¨ ᢥá⨠¢ â ¡л¨æã/£à ä¨ª


Т¥ªá ¯àо£à ммë  н л¨з âоà :

#include <stdio.h>

#include <stdlib.h>


unsigned hash_shift(char *s)

{

unsigned hash = 0;


while (*s)

hash = (hash << 1) + *s++;

return hash;

}


unsigned hash_rotl(char *s)

{

unsigned hash = 0;


while (*s)

hash = _rotl(hash,1) ^ *s++;

return hash;

}


long hash_pgw(char *s)

{

char *p;

unsigned long h = 0, g;


for(p=s;*p;p++)

{

h = (h << 4) +а *p;

if (g = h & 0xF)

{

аh = h^(g >>24);

аh = h^g;

}

}

return h;

}


unsigned hash_4 (char *s)

{

unsigned h = 0;

const D=5;




- 11 -


while (*s)

h = D*h + *s++;

return h;

}


main (int argc,char **argv)

{

int n=0,total=0;

int placed1=0,placed2=0,placed3=0,placed4=0;

int max1=0,max2=0,max3=0,max4=0;

int *table1,*table2,*table3,*table4;

FILE *f;

char buf[100];


puts ("Л ¡оà âоàн ï à ¡оâ  3. ˆááл¥до¢ н¨¥ åíè-äãнªæ¨¨.");

puts ("Лï¯ãно¢ ˆ.В. И-61");


if (argc<3)

{

puts ("\nа HASH.COM <n> <file_name>");

puts (" n - ç¨áло íл¥м¥нâо¢ ¢ â ¡л¨æ¥");

puts (" file_name - ¨мï ä йл  á ¨д¥нâ¨ä¨ª âоà м¨ \n");

return 0;

}

n=atoi(argv[1]);

if (n<10)

{

puts ("\n Сл¨èªом м л¥н쪠ï â ¡л¨æ  \n");

return 0;

}

table1=(int*)calloc(n,sizeof(int));

table2=(int*)calloc(n,sizeof(int));

table3=(int*)calloc(n,sizeof(int));

table4=(int*)calloc(n,sizeof(int));

if (!table1 || !table2 || !table3 || !table4)

{

puts ("\n¥ 墠⠥⠯ мï⨠длï â ¡л¨æ\n");

return 0;

}

if ((f=fopen(argv[2],"r"))==NULL)

{

puts ("\n¥ мо£ã оâªàëâì 㪠з ннëй ä йл\n");

return 0;

}


while (!feof(f))

if (fscanf (f,"%s",buf)==1)

{

аtable1[hash_shift(buf)%n]++;

аtable2[hash_rotl(buf)%n]++;

аtable3[hash_pgw(buf)%n]++;

аtable4[hash_4(buf)%n]++;

аputs (buf);

аtotal++;

}

puts ("Сâ â¨á⨪  :");

for (int i=0;i<n;i++)

{

if (table1[i]) {

аif (max1<table1[i]) max1=table1[i];

аplaced1++;

}

if (table2[i]) {

аif (max2<table2[i]) max2=table2[i];

аplaced2++;

}

if (table3[i]) {

аif (max3<table3[i]) max3=table3[i];

аplaced3++;

}



- 12 -


if (table4[i]) {

аif (max4<table4[i]) max4=table4[i];

аplaced4++;

}

}

printf (" HASH_SHIFT : %fа max=%d\n",(float)total/placed1,max1);

printf (" HASH_ROTLа : %fа max=%d\n",(float)total/placed2,max2);

printf (" HASH_PGW : %fа max=%d\n",(float)total/placed3,max3);

printf (" HASH_4 : %fа max=%d\n",(float)total/placed4,max4);

fclose (f);


return 0;

}