Matrixbob
23-04-2007, 17:51
Ho trovato la "funzione/programma" che leggerete a seguire.
Ma gią nel "main" c'č qualcosa che non mi piace ed č questo:
RBuffer = (char*) malloc( BUFFER_LEN );
while( fgets( RBuffer, BUFFER_LEN, InFile ) != NULL ){
dfConvert( RBuffer );
fputs( RBuffer, OutFile );
}// endwhile
free( RBuffer );
fgets mi pare sia 1 funzione che legge al MAX quel tot_caratteri dopo di che si ferma.
Se il file č + lungo allora son cavoli e bisogna modificare la MACRO.
Giusto?!
/*
* HTML2TXT
*
* Copyright 2000 Matteo Baccan <mbaccan@planetisa.com>
* www - http://www.infomedia.it/artic/Baccan
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA (or visit
* their web site at http://www.gnu.org/).
*
*/
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>
#include <io.h>
#define BUFFER_LEN 4096 // buffer lenght
#define CV_EXITPARA 1 // parameter error
#define CV_EXITOPEN 2 // open error
#define CV_EXITEXIST 3 // file exist
void dfPrintLogo(void); // logo
void dfPrintInfo(void); // parameter info
void dfPrintOpenError( const char * InFile ); // open error
void dfPrintExist( const char * OutFile ); // file Exist
void dfConvert( char * Buffer ); // convert string
int dfCheckChar( int iLen,
char *Buffer,
int iPointer,
char *Check,
int iCheckLen );
int main(int argc, char *argv[]) {
FILE *InFile, *OutFile; // file declaration
char *RBuffer; // Read Buffer
if( argc < 3 ){ // check parameter
dfPrintInfo();
exit( CV_EXITPARA );
}// endif
dfPrintLogo();
if( access( argv[2], 00 ) >= 0 ){ // file exist
dfPrintExist( argv[2] );
exit( CV_EXITEXIST );
}// endif
if( (InFile = fopen( argv[1], "r")) == NULL ){ // open file
dfPrintOpenError( argv[1] );
exit( CV_EXITOPEN );
}// endif
if( (OutFile = fopen( argv[2], "w")) == NULL ){ // create file
dfPrintOpenError( argv[2] );
exit( CV_EXITOPEN );
}// endif
printf("\n ž Reading %s\n", argv[1] ); // Convert
RBuffer = (char*) malloc( BUFFER_LEN );
while( fgets( RBuffer, BUFFER_LEN, InFile ) != NULL ){
dfConvert( RBuffer );
fputs( RBuffer, OutFile );
}// endwhile
free( RBuffer );
printf("\n ž OK \n" );
fclose( InFile ); // Close File
fclose( OutFile );
return 0;
}// end of main
void dfPrintInfo(){
printf("\n");
printf("ŚÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄæ\n");
printf("³²±° HTML2TXT Converter HTML to TXT Version 2.00 °±²³\n");
printf("³²±° Copyright 1997-2000 The Wonderful Team All Rights Reserved °±²³\n");
printf("³²±° ÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄ °±²³\n");
printf("³²±° Usage: HTML2TXT <InFile> <OutFile> °±²³\n");
printf("³²±° °±²³\n");
printf("³²±° InFile = File 2 convert into TXT °±²³\n");
printf("³²±° °±²³\n");
printf("³²±° OutFile = File 2 save °±²³\n");
printf("ĄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄŁ\n");
}// end of print info
void dfPrintLogo(){
printf("\n");
printf("ŚÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄæ\n");
printf("³²±° HTML2TXT Converter HTML to TXT Version 2.00 °±²³\n");
printf("³²±° Copyright 1997-2000 The Wonderful Team All Rights Reserved °±²³\n");
printf("ĄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄŁ\n");
}// end of print logo
void dfPrintOpenError( const char * InFile ){
printf("\n ž Error opening file %s\n", InFile );
}// end of print error
void dfPrintExist( const char * OutFile ){
printf("\n ž Output file %s Exist \n", OutFile );
}// end of print Exist
void dfConvert( char * Buffer ){
static int iStart=0;
int iPos,iPointer;
int iLen=strlen( Buffer );
iPos=iPointer=0;
while( iLen-->0 ){
iPointer++;
// BlockQuote
if( dfCheckChar( iLen, Buffer, iPointer, "<BLOCKQUOTE>", 12 ) ){
Buffer[iPos++]='"';
iPointer+=11;
continue;
}
if( dfCheckChar( iLen, Buffer, iPointer, "</BLOCKQUOTE>", 13 ) ){
Buffer[iPos++]='"';
iPointer+=12;
continue;
}
// LineBreak
if( dfCheckChar( iLen, Buffer, iPointer, "<BR>", 4 ) ){
Buffer[iPos++]=0x0d;
iPointer+=3;
continue;
}
if( dfCheckChar( iLen, Buffer, iPointer, "</BR>", 5 ) ){
Buffer[iPos++]=0x0d;
iPointer+=4;
continue;
}
// Citation
if( dfCheckChar( iLen, Buffer, iPointer, "<CITE>", 6 ) ){
Buffer[iPos++]='"';
iPointer+=5;
continue;
}
if( dfCheckChar( iLen, Buffer, iPointer, "</CITE>", 7 ) ){
Buffer[iPos++]='"';
iPointer+=6;
continue;
}
// Tab
if( dfCheckChar( iLen, Buffer, iPointer, "<TD>", 4 ) ){
Buffer[iPos++]=9;
iPointer+=3;
continue;
}
if( dfCheckChar( iLen, Buffer, iPointer, "</TD>", 5 ) ){
Buffer[iPos++]=9;
iPointer+=4;
continue;
}
// HTML Command Skipper
if( Buffer[iPointer-1]=='<' ){
if( Buffer[iPointer]!='\0' ){
if( Buffer[iPointer]>='a' && Buffer[iPointer]<='z' ) iStart=1;
if( Buffer[iPointer]>='A' && Buffer[iPointer]<='Z' ) iStart=1;
if( Buffer[iPointer]=='!' ) iStart=1;
if( Buffer[iPointer]=='/' ) iStart=1;
}
}
if( Buffer[iPointer-1]=='>'&& iStart==1 ) {
iStart=0;
continue;
}
if( iStart==0 ){
if( Buffer[iPointer-1]=='&' ){
if( dfCheckChar( iLen, Buffer, iPointer, "<" , 4 ) ){ Buffer[iPos++]='<'; iPointer+=3; continue; } //4
if( dfCheckChar( iLen, Buffer, iPointer, ">" , 4 ) ){ Buffer[iPos++]='>'; iPointer+=3; continue; } //4
if( dfCheckChar( iLen, Buffer, iPointer, "&" , 5 ) ){ Buffer[iPos++]='&'; iPointer+=4; continue; } //5
if( dfCheckChar( iLen, Buffer, iPointer, """ , 6 ) ){ Buffer[iPos++]='"'; iPointer+=5; continue; } //6
if( dfCheckChar( iLen, Buffer, iPointer, "Á" , 8 ) ){ Buffer[iPos++]=' '; iPointer+=7; continue; } //8
if( dfCheckChar( iLen, Buffer, iPointer, "À" , 8 ) ){ Buffer[iPos++]='…'; iPointer+=7; continue; } //8
if( dfCheckChar( iLen, Buffer, iPointer, "Â" , 7 ) ){ Buffer[iPos++]='ƒ'; iPointer+=6; continue; } //7
if( dfCheckChar( iLen, Buffer, iPointer, "Ã" , 8 ) ){ Buffer[iPos++]='†'; iPointer+=7; continue; } //8
if( dfCheckChar( iLen, Buffer, iPointer, "Å" , 7 ) ){ Buffer[iPos++]=''; iPointer+=6; continue; } //7
if( dfCheckChar( iLen, Buffer, iPointer, "Ä" , 6 ) ){ Buffer[iPos++]='„'; iPointer+=5; continue; } //6
if( dfCheckChar( iLen, Buffer, iPointer, "Æ" , 7 ) ){ Buffer[iPos++]='’'; iPointer+=6; continue; } //7
if( dfCheckChar( iLen, Buffer, iPointer, "Ç" , 8 ) ){ Buffer[iPos++]='‡'; iPointer+=7; continue; } //8
if( dfCheckChar( iLen, Buffer, iPointer, "É" , 8 ) ){ Buffer[iPos++]='‚'; iPointer+=7; continue; } //8
if( dfCheckChar( iLen, Buffer, iPointer, "È" , 8 ) ){ Buffer[iPos++]='Š'; iPointer+=7; continue; } //8
if( dfCheckChar( iLen, Buffer, iPointer, "Ê" , 7 ) ){ Buffer[iPos++]='ˆ'; iPointer+=6; continue; } //7
if( dfCheckChar( iLen, Buffer, iPointer, "Ë" , 6 ) ){ Buffer[iPos++]='‰'; iPointer+=5; continue; } //6
if( dfCheckChar( iLen, Buffer, iPointer, "Í" , 8 ) ){ Buffer[iPos++]='”'; iPointer+=7; continue; } //8
if( dfCheckChar( iLen, Buffer, iPointer, "Ì" , 8 ) ){ Buffer[iPos++]=''; iPointer+=7; continue; } //8
if( dfCheckChar( iLen, Buffer, iPointer, "Î" , 7 ) ){ Buffer[iPos++]='Œ'; iPointer+=6; continue; } //7
if( dfCheckChar( iLen, Buffer, iPointer, "Ï" , 6 ) ){ Buffer[iPos++]='‹'; iPointer+=5; continue; } //6
if( dfCheckChar( iLen, Buffer, iPointer, "Ð" , 5 ) ){ Buffer[iPos++]='Ń'; iPointer+=4; continue; } //5
if( dfCheckChar( iLen, Buffer, iPointer, "Ñ" , 8 ) ){ Buffer[iPos++]='¤'; iPointer+=7; continue; } //8
if( dfCheckChar( iLen, Buffer, iPointer, "Ó" , 8 ) ){ Buffer[iPos++]='¢'; iPointer+=7; continue; } //8
if( dfCheckChar( iLen, Buffer, iPointer, "Ò" , 8 ) ){ Buffer[iPos++]='•'; iPointer+=7; continue; } //8
if( dfCheckChar( iLen, Buffer, iPointer, "Ô" , 7 ) ){ Buffer[iPos++]='“'; iPointer+=6; continue; } //7
if( dfCheckChar( iLen, Buffer, iPointer, "Õ" , 8 ) ){ Buffer[iPos++]='”'; iPointer+=7; continue; } //8
if( dfCheckChar( iLen, Buffer, iPointer, "Ö" , 6 ) ){ Buffer[iPos++]='”'; iPointer+=5; continue; } //6
if( dfCheckChar( iLen, Buffer, iPointer, "Ø" , 8 ) ){ Buffer[iPos++]='0'; iPointer+=7; continue; } //8
if( dfCheckChar( iLen, Buffer, iPointer, "Ú" , 8 ) ){ Buffer[iPos++]='£'; iPointer+=7; continue; } //8
if( dfCheckChar( iLen, Buffer, iPointer, "Ù" , 8 ) ){ Buffer[iPos++]='—'; iPointer+=7; continue; } //8
if( dfCheckChar( iLen, Buffer, iPointer, "Û" , 7 ) ){ Buffer[iPos++]='–'; iPointer+=6; continue; } //7
if( dfCheckChar( iLen, Buffer, iPointer, "Ü" , 6 ) ){ Buffer[iPos++]=''; iPointer+=5; continue; } //6
if( dfCheckChar( iLen, Buffer, iPointer, "Ý" , 8 ) ){ Buffer[iPos++]='Y'; iPointer+=7; continue; } //8
if( dfCheckChar( iLen, Buffer, iPointer, "Þ" , 7 ) ){ Buffer[iPos++]='č'; iPointer+=6; continue; } //7
if( dfCheckChar( iLen, Buffer, iPointer, "ß" , 7 ) ){ Buffer[iPos++]='į'; iPointer+=6; continue; } //7
if( dfCheckChar( iLen, Buffer, iPointer, "á" , 8 ) ){ Buffer[iPos++]=' '; iPointer+=7; continue; } //8
if( dfCheckChar( iLen, Buffer, iPointer, "à" , 8 ) ){ Buffer[iPos++]='…'; iPointer+=7; continue; } //8
if( dfCheckChar( iLen, Buffer, iPointer, "â" , 7 ) ){ Buffer[iPos++]='ƒ'; iPointer+=6; continue; } //7
if( dfCheckChar( iLen, Buffer, iPointer, "ã" , 8 ) ){ Buffer[iPos++]='†'; iPointer+=7; continue; } //8
if( dfCheckChar( iLen, Buffer, iPointer, "å" , 7 ) ){ Buffer[iPos++]=''; iPointer+=6; continue; } //7
if( dfCheckChar( iLen, Buffer, iPointer, "ä" , 6 ) ){ Buffer[iPos++]='„'; iPointer+=5; continue; } //6
if( dfCheckChar( iLen, Buffer, iPointer, "æ" , 7 ) ){ Buffer[iPos++]='‘'; iPointer+=6; continue; } //7
if( dfCheckChar( iLen, Buffer, iPointer, "ç" , 8 ) ){ Buffer[iPos++]='‡'; iPointer+=7; continue; } //8
if( dfCheckChar( iLen, Buffer, iPointer, "é" , 8 ) ){ Buffer[iPos++]='‚'; iPointer+=7; continue; } //8
if( dfCheckChar( iLen, Buffer, iPointer, "è" , 8 ) ){ Buffer[iPos++]='Š'; iPointer+=7; continue; } //8
if( dfCheckChar( iLen, Buffer, iPointer, "ê" , 7 ) ){ Buffer[iPos++]='ˆ'; iPointer+=6; continue; } //7
if( dfCheckChar( iLen, Buffer, iPointer, "ë" , 6 ) ){ Buffer[iPos++]='‰'; iPointer+=5; continue; } //6
if( dfCheckChar( iLen, Buffer, iPointer, "í" , 8 ) ){ Buffer[iPos++]='”'; iPointer+=7; continue; } //8
if( dfCheckChar( iLen, Buffer, iPointer, "ì" , 8 ) ){ Buffer[iPos++]=''; iPointer+=7; continue; } //8
if( dfCheckChar( iLen, Buffer, iPointer, "î" , 7 ) ){ Buffer[iPos++]='Œ'; iPointer+=6; continue; } //7
if( dfCheckChar( iLen, Buffer, iPointer, "ï" , 6 ) ){ Buffer[iPos++]='‹'; iPointer+=5; continue; } //6
if( dfCheckChar( iLen, Buffer, iPointer, "ð" , 5 ) ){ Buffer[iPos++]='Ń'; iPointer+=4; continue; } //5
if( dfCheckChar( iLen, Buffer, iPointer, "ñ" , 8 ) ){ Buffer[iPos++]='¤'; iPointer+=7; continue; } //8
if( dfCheckChar( iLen, Buffer, iPointer, "ó" , 8 ) ){ Buffer[iPos++]='¢'; iPointer+=7; continue; } //8
if( dfCheckChar( iLen, Buffer, iPointer, "ò" , 8 ) ){ Buffer[iPos++]='•'; iPointer+=7; continue; } //8
if( dfCheckChar( iLen, Buffer, iPointer, "ô" , 7 ) ){ Buffer[iPos++]='“'; iPointer+=6; continue; } //7
if( dfCheckChar( iLen, Buffer, iPointer, "õ" , 8 ) ){ Buffer[iPos++]='”'; iPointer+=7; continue; } //8
if( dfCheckChar( iLen, Buffer, iPointer, "ö" , 6 ) ){ Buffer[iPos++]='”'; iPointer+=5; continue; } //6
if( dfCheckChar( iLen, Buffer, iPointer, "ø" , 8 ) ){ Buffer[iPos++]='0'; iPointer+=7; continue; } //8
if( dfCheckChar( iLen, Buffer, iPointer, "ú" , 8 ) ){ Buffer[iPos++]='£'; iPointer+=7; continue; } //8
if( dfCheckChar( iLen, Buffer, iPointer, "ù" , 8 ) ){ Buffer[iPos++]='—'; iPointer+=7; continue; } //8
if( dfCheckChar( iLen, Buffer, iPointer, "û" , 7 ) ){ Buffer[iPos++]='–'; iPointer+=6; continue; } //7
if( dfCheckChar( iLen, Buffer, iPointer, "ü" , 6 ) ){ Buffer[iPos++]=''; iPointer+=5; continue; } //6
if( dfCheckChar( iLen, Buffer, iPointer, "ý" , 8 ) ){ Buffer[iPos++]='Y'; iPointer+=7; continue; } //8
if( dfCheckChar( iLen, Buffer, iPointer, "þ" , 7 ) ){ Buffer[iPos++]='č'; iPointer+=6; continue; } //7
if( dfCheckChar( iLen, Buffer, iPointer, "ÿ" , 6 ) ){ Buffer[iPos++]='˜'; iPointer+=5; continue; } //6
if( dfCheckChar( iLen, Buffer, iPointer, "®" , 5 ) ){ Buffer[iPos++]='(';
Buffer[iPos++]='r';
Buffer[iPos++]=')'; iPointer+=4; continue; } //5
if( dfCheckChar( iLen, Buffer, iPointer, "©" , 6 ) ){ Buffer[iPos++]='(';
Buffer[iPos++]='c';
Buffer[iPos++]=')'; iPointer+=5; continue; } //6
if( dfCheckChar( iLen, Buffer, iPointer, "™" , 7 ) ){ Buffer[iPos++]='t';
Buffer[iPos++]='m'; iPointer+=6; continue; } //7
if( dfCheckChar( iLen, Buffer, iPointer, " " , 6 ) ){ Buffer[iPos++]=' '; iPointer+=5; continue; } //6
// &#number
if( Buffer[iPointer]=='#' ){
// May be a Number
int nCount = 0;
while( iLen-(nCount+1)>0 && // I have char?
Buffer[iPointer+1+nCount]>='0' && // Are number ?
Buffer[iPointer+1+nCount]<='9' ){
nCount++;
}
// If I have number .. try to cenvert it
if( nCount>0 ){
int nDmm = 0;
int nChar = 0;
int nMul = 1;
while( nDmm<nCount ){
nChar += (Buffer[iPointer+nCount-nDmm]-48)*nMul;
printf( "%d\n", Buffer[iPointer+nCount-nDmm]-48 );
printf( "%d\n", nMul );
nMul *= 10;
nDmm++;
}
if( nChar>0 ){
Buffer[iPos++]=nChar;
iPointer+=nDmm+1;
continue;
}
}
}
}
if( Buffer[iPointer-1]==0x0d &&
Buffer[iPointer ]==0x0d ){
iPointer++;
continue;
}
Buffer[iPos++]=Buffer[iPointer-1];
}
}
Buffer[iPos++]='\0';
}
// convert string
int dfCheckChar( int iLen,
char *Buffer,
int iPointer,
char *Check,
int iCheckLen ){
int iRet=0;
if( iLen+1 >= iCheckLen ){
iPointer--;
while( *Check!='\0' ){
if( Buffer[iPointer] ==*Check ||
(Buffer[iPointer]|32)==*Check ){
iPointer++;
Check++;
} else break;
}
iRet = (*Check==0 || *Check==13 || *Check==';' || *Check==' ');
}
return iRet;
}
Matrixbob
23-04-2007, 18:13
Ho accantonato quella perchč pensavo che la mia sia meglio, ma anche la mia s'impianta.
[NB]
Quello che segue č solo la parte che da problemi del mio programma.
int crea_mod_file(char *infile, char *outfile)
{
FILE *fp_infile, *fp_outfile;
char a, *tmp_string, *new_string;
int i=0;
if((fp_infile = fopen(infile, "r")) == NULL)
{
printf("\nError opening file %s.\n", infile);
return -1;
}
if((fp_outfile = fopen(outfile, "w")) == NULL)
{ // create file
printf("\nError opening file %s.\n", outfile);
return -2;
}
tmp_string=(char*) malloc(SIZEBUF);
memset(tmp_string, 0, SIZEBUF);
while((fgets(tmp_string, (SIZEBUF-1), fp_infile))!=NULL)
{
new_string=mod_escaped_html(tmp_string);
fputs(new_string, fp_outfile);
memset(tmp_string, 0, SIZEBUF);
}
free(tmp_string);
fclose(fp_infile);
fclose(fp_outfile);
return 0;
}
/******************************/
/* Funzione che conforma i cartatteri di una stringa */
char *mod_escaped_html(char *tmp_buffer)
{
tmp_buffer = subst_string(tmp_buffer, "<", "<");
tmp_buffer = subst_string(tmp_buffer, ">", ">");
tmp_buffer = subst_string(tmp_buffer, " ", " ");
tmp_buffer = subst_string(tmp_buffer, "à", "ą");
tmp_buffer = subst_string(tmp_buffer, "è", "č");
tmp_buffer = subst_string(tmp_buffer, "é", "é");
tmp_buffer = subst_string(tmp_buffer, "ì", "ģ");
tmp_buffer = subst_string(tmp_buffer, "ò", "ņ");
tmp_buffer = subst_string(tmp_buffer, "ù", "ł");
tmp_buffer = subst_string(tmp_buffer, "«", "<");
tmp_buffer = subst_string(tmp_buffer, "»", ">");
tmp_buffer = subst_string(tmp_buffer, ">", ">");
tmp_buffer = subst_string(tmp_buffer, "°", "°");
tmp_buffer = subst_string(tmp_buffer, "&", "&");
tmp_buffer = subst_string(tmp_buffer, "’", "'");
tmp_buffer = subst_string(tmp_buffer, "'", "'");
tmp_buffer = subst_string(tmp_buffer, """, "\"");
return tmp_buffer;
}
vBulletin® v3.6.4, Copyright ©2000-2025, Jelsoft Enterprises Ltd.