나중에 사용할 일이 생길것 같아서...
메모리 할당이 가능한 선에서는 메모리에서 처리하고 메모리 처리가 안되면
속도 저하가 심하지만 파일에서 라인단위로 처리하는 방식
가정 : 기본적으로 입력받는 디레토리 아래에 있는 파일은 모두 정렬이 돼 있어야 한다.
#include <unistd.h>
#include <sys/types.h>
#include <stdlib.h>
#include <dirent.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <string.h>
#include <stdio.h>
#include <netdb.h>
#include <arpa/inet.h>
char *strlcat( char *dst, char *src, int *limit)
{
char *p;
if(!src) return dst;
if(!dst) return NULL;
p = dst;
while(*p++); p--;
for( ; *src != 0 && *limit > 0; (*limit)-- , src++, p++) *p = *src;
*p = '\0';
return p;
}
int mergefile_mem( char *fn1, char *fn2, char *fn3 )
{
int ret, len;
int rsize1, rsize2;
char *bp1, *bp2;
char *ret1, *ret2;
char *pline;
FILE *f1, *f2, *fw;
char *buffer1;
char *buffer2;
char *buffer3;
struct stat stat1, stat2;
int filesize;
if( stat( fn1, &stat1 ) == -1 || stat1.st_size <= 0 )
return -1;
if( stat( fn2, &stat2 ) == -1 || stat2.st_size <= 0 )
return -2;
filesize = (stat1.st_size + stat2.st_size);
if((buffer1 = malloc( sizeof(char)*(stat1.st_size+1 ))) == NULL ) return -3;
if((buffer2 = malloc( sizeof(char)*(stat2.st_size+1 ))) == NULL ) return -4;
if((buffer3 = malloc( sizeof(char)*(filesize+1) )) == NULL ) return -5;
buffer3[0] = '\0';
if(( f1 = fopen( fn1, "r" )) == NULL ) return -6;
if(( f2 = fopen( fn2, "r" )) == NULL ) return -7;
if(( fw = fopen( fn3, "w" )) == NULL ) return -8;
if((rsize1 = fread( buffer1, sizeof(char), stat1.st_size, f1 )) != stat1.st_size )
{
fprintf( stderr,"cannt read file[%d:%d]\n", stat1.st_size, rsize1 );
return -9;
}
buffer1[stat1.st_size] = '\0';
if((rsize2 = fread( buffer2, sizeof(char), stat2.st_size, f2 )) != stat2.st_size )
{
fprintf( stderr,"cannt read file[%d:%d]\n", stat2.st_size, rsize2 );
return -10;
}
buffer2[stat2.st_size] = '\0';
len = filesize;
pline = buffer3;
ret1 = strtok_r( buffer1, "\n", &bp1 );
ret2 = strtok_r( buffer2, "\n", &bp2 );
pline = buffer3;
while( 1 )
{
if( ret1 == NULL && ret2 == NULL ) break;
if( ret1 == NULL ) ret = 1;
else if( ret2 == NULL ) ret = -1;
else ret = strcmp( ret1, ret2 );
switch( ret )
{
case 0 :
pline = strlcat( pline, ret1, &len );
pline = strlcat( pline, "\n", &len );
ret1 = strtok_r( NULL, "\n", &bp1 );
ret2 = strtok_r( NULL, "\n", &bp2 );
break;
case -1 :
pline = strlcat( pline, ret1, &len );
pline = strlcat( pline, "\n", &len );
ret1 = strtok_r( NULL, "\n", &bp1 );
break;
case 1 :
pline = strlcat( pline, ret2, &len );
pline = strlcat( pline, "\n", &len );
ret2 = strtok_r( NULL, "\n", &bp2 );
break;
}
}
fwrite( buffer3, filesize, 1, fw );
free(buffer1);
free(buffer2);
free(buffer3);
fclose(f1);
fclose(f2);
fclose(fw);
return 0;
}
int mergefile( char *fn1, char *fn2, char *fn3 )
{
int ret;
char *ret1, *ret2;
FILE *f1, *f2, *fw;
char buffer1[10240];
char buffer2[10240];
if(( f1 = fopen( fn1, "r" )) == NULL ) return -1;
if(( f2 = fopen( fn2, "r" )) == NULL ) return -1;
if(( fw = fopen( fn3, "w" )) == NULL ) return -1;
ret1 = fgets( buffer1, 10240, f1 );
ret2 = fgets( buffer2, 10240, f2 );
while( 1 )
{
if( ret1 == NULL && ret2 == NULL ) break;
if( ret1 == NULL ) ret = 1;
else if( ret2 == NULL ) ret = -1;
else{
ret = strcmp( ret1, ret2 );
}
switch( ret )
{
case 0 :
fprintf( fw, "%s", buffer1 );
ret1 = fgets( buffer1, 10240, f1 );
ret2 = fgets( buffer2, 10240, f2 );
break;
case -1 :
fprintf( fw, "%s", buffer1 );
ret1 = fgets( buffer1, 10240, f1 );
break;
case 1 :
fprintf( fw, "%s", buffer2 );
ret2 = fgets( buffer2, 10240, f2 );
break;
}
}
fclose(f1);
fclose(f2);
fclose(fw);
}
int merge_files( char *base_dir )
{
int i;
int ret;
int suffix;
int prefix;
int numberoffiles;
int target_file_num=0;
char *first = NULL;
char *second = NULL;
struct dirent **items;
struct stat fstat;
char pattern[256];
char filename[1024];
if (chdir(base_dir) < 0)
{
fprintf(stderr,"[%s:%d]\tchdir=(%s)\n", __FUNCTION__,__LINE__,base_dir );
perror("chdir : ");
exit(1);
}
prefix = 0;
suffix = 0;
while( ( numberoffiles = scandir( ".", &items, NULL, alphasort)) > 1 )
{
target_file_num = 0;
fprintf( stderr,"file num = %d\n", numberoffiles );
if( prefix == 0 ) sprintf( pattern, "%s" , ".fin" );
else sprintf( pattern, "%04d.mid." , prefix - 1 );
for( i = 0; i < numberoffiles; i++ )
{
lstat(items[i]->d_name, &fstat);
if( strcmp ( items[i]->d_name, "." ) == 0 ) continue;
if( strcmp ( items[i]->d_name, ".." ) == 0 ) continue;
if ((fstat.st_mode & S_IFDIR) == S_IFDIR) continue;
if( strstr( items[i]->d_name, pattern ) == NULL ) continue;
target_file_num++;
if( first == NULL ) first = items[i]->d_name;
else if( second == NULL ) second = items[i]->d_name;
if( first != NULL && second != NULL )
{
sprintf( filename, "%s/%04d.mid.%03d", base_dir, prefix, suffix++ );
fprintf( stderr,"[%d]\t%s : %s\n", i, first, second );
if((ret = mergefile_mem( first, second, filename )) != 0 )
{
if((ret = mergefile( first, second, filename )) != 0 )
{
fprintf( stderr,"cannot merge files(%s,%s)[%d]\n", first, second, ret );;
return -1;
}
}
if(( unlink( first )) != 0 )
{
perror("cannot remove first file : ");
}
if(( unlink( second )) != 0 )
{
perror("cannot remove second file : ");
}
first = NULL;
second = NULL;
}
}
if( first != NULL )
{
sprintf( filename, "%s/%04d.mid.%03d", base_dir, prefix, suffix++ );
rename( first, filename );
if(( unlink( first )) != 0 )
{
perror("cannot remove first file : ");
}
first = NULL;
}
suffix = 0;
prefix++;
if( target_file_num == 2 ) break;;
}
rename( filename, "final.squid");
return 0;
}
int main()
{
merge_files( "/data2/jchern/data" );