Tuesday, November 20, 2012

Split large TEXT file in JAVA

In some cases, large files has to be split into smaller ones for the fast processing. In my case, file contained about 10 million records. Processing 10 million is bit low than parallel processing smaller files.

Split.java can be used to create files with number of predefined lines.

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;

public class Split {

    private final static String NEWLINE = System.getProperty("line.separator");

    public static void readFileData(String filename, int lines) throws IOException {
try {
   BufferedReader bufferedReader = new BufferedReader(new FileReader(
   filename));
   StringBuffer stringBuffer = new StringBuffer();

   String line;
   int i = 0;
   int counter = 1;
   while ((line = bufferedReader.readLine()) != null) {
stringBuffer.append(line);
stringBuffer.append(NEWLINE);
i++;
if (i >= lines) {
   saveFile(stringBuffer, filename + counter);
   stringBuffer = new StringBuffer();
   i = 0;
   counter++;
}
   }
   bufferedReader.close();
} catch (IOException e) {
   throw new IOException("read file error " + filename);
}
    }

    private static void createFile(StringBuffer stringBuffer, String filename) {
String path = (new File("")).getAbsolutePath();
File file = new File(path + "/" + filename);
FileWriter output = null;
try {
   output = new FileWriter(file);
   output.write(stringBuffer.toString());
   System.out.println("file " + path + filename + " written");
} catch (IOException e) {
   e.printStackTrace();
} finally {

   try {
output.close();
   } catch (IOException e) {
   }
}
    }

    public static void main(String[] args) {

String fileName = "/usr/sujith/filename.txt"
int lines = 1000000;

try {
    readFileData(fileName, lines);
} catch (IOException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
}
    }
}