automation of data format conversion to parent child format
This is an excel sheet which has only a single column filled for each row. (explanation : all CITY categories fall under V21 , all handset categories fall under CityJ and so on )
V21
CITYR
CITYJ
HandsetS
HandsetHW
HandsetHA
LOWER_AGE<=20
LOWER_AGE>20
SMS_COUNT<=0
RECHARGE_MRP<=122
RECHARGE_MRP>122
SMS_COUNT>0
I need to change this format to a double column开发者_开发问答 format with parent and child category format. therefore the output sheet would be
V21 CITYR
V21 CITYJ
CITYJ HandsetS
CITYJ HandsetHW
CITYJ HandsetHA
HandsetHA LOWER_AGE<=20
HandsetHA LOWER_AGE>20
LOWER_AGE>20 SMS_COUNT<=0
SMS_COUNT<=0 RECHARGE_MRP<=122
SMS_COUNT<=0 RECHARGE_MRP>122
LOWER_AGE>20 SMS_COUNT>0
the datas are huge so i cant do them manually . how can i automate this ?
There are 3 parts of the task so I want to know what is that you are asking help about.
- Reading excel sheet data into Java
- Manipulating data
- Writing data back into the excel sheet.
You have said that the data sheet is large and cannot be pulled as a whole into memory. Can I ask you how many top level elements do you have ? i.e, How many V21s do you have? If it is just ONE, then how many CITYR/CITYJ do you have?
--
Adding some source code from my previous answer about how to manipulate data. I gave it an input file which was separated by tabs (4 spaces equals to one column for you in excel) and the following code printed stuff out neatly. Please note that there is a condition of level == 1 left empty. If you think ur JVM has too many objects, you could clear the entries and stack at that point :)
package com.ekanathk;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Stack;
import java.util.logging.Logger;
import org.junit.Test;
class Entry {
private String input;
private int level;
public Entry(String input, int level) {
this.input = input;
this.level = level;
}
public String getInput() {
return input;
}
public int getLevel() {
return level;
}
@Override
public String toString() {
return "Entry [input=" + input + ", level=" + level + "]";
}
}
public class Tester {
private static final Logger logger = Logger.getLogger(Tester.class.getName());
@SuppressWarnings("unchecked")
@Test
public void testSomething() throws Exception {
InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream("samplecsv.txt");
BufferedReader b = new BufferedReader(new InputStreamReader(is));
String input = null;
List entries = new ArrayList();
Stack<Entry> stack = new Stack<Entry>();
stack.push(new Entry("ROOT", -1));
while((input = b.readLine()) != null){
int level = whatIsTheLevel(input);
input = input.trim();
logger.info("input = " + input + " at level " + level);
Entry entry = new Entry(input, level);
if(level == 1) {
//periodically clear out the map and write it to another excel sheet
}
if (stack.peek().getLevel() == entry.getLevel()) {
stack.pop();
}
Entry parent = stack.peek();
logger.info("parent = " + parent);
entries.add(new String[]{parent.getInput(), entry.getInput()});
stack.push(entry);
}
for(Object entry : entries) {
System.out.println(Arrays.toString((String[])entry));
}
}
private int whatIsTheLevel(String input) {
int numberOfSpaces = 0;
for(int i = 0 ; i < input.length(); i++) {
if(input.charAt(i) != ' ') {
return numberOfSpaces/4;
} else {
numberOfSpaces++;
}
}
return numberOfSpaces/4;
}
}
This considers that you have a file small enough to fit in computer memory. Even 10MB file should be good.
It has 2 parts:
DataTransformer which does all the required transformation of data
TreeNode is custom simple Tree data structure
public class DataTransformer {
public static void main(String[] args) throws IOException {
InputStream in = DataTransformer.class
.getResourceAsStream("source_data.tab");
BufferedReader br = new BufferedReader(
new InputStreamReader(in));
String line;
TreeNode root = new TreeNode("ROOT", Integer.MIN_VALUE);
TreeNode currentNode = root;
while ((line = br.readLine()) != null) {
int level = getLevel(line);
String value = line.trim();
TreeNode nextNode = new TreeNode(value, level);
relateNextNode(currentNode, nextNode);
currentNode = nextNode;
}
printAll(root);
}
public static int getLevel(String line) {
final char TAB = '\t';
int numberOfTabs = 0;
for (int i = 0; i < line.length(); i++) {
if (line.charAt(i) != TAB) {
break;
}
numberOfTabs++;
}
return numberOfTabs;
}
public static void relateNextNode(
TreeNode currentNode, TreeNode nextNode) {
if (currentNode.getLevel() < nextNode.getLevel()) {
currentNode.addChild(nextNode);
} else {
relateNextNode(currentNode.getParent(), nextNode);
}
}
public static void printAll(TreeNode node) {
if (!node.isRoot() && !node.getParent().isRoot()) {
System.out.println(node);
}
for (TreeNode childNode : node.getChildren()) {
printAll(childNode);
}
}
}
class TreeNode implements Serializable {
private static final long serialVersionUID = 1L;
private TreeNode parent;
private List<TreeNode> children = new ArrayList<TreeNode>();
private String value;
private int level;
public TreeNode(String value, int level) {
this.value = value;
this.level = level;
}
public void addChild(TreeNode child) {
child.parent = this;
this.children.add(child);
}
public void addSibbling(TreeNode sibbling) {
TreeNode parent = this.parent;
parent.addChild(sibbling);
}
public TreeNode getParent() {
return parent;
}
public List<TreeNode> getChildren() {
return children;
}
public String getValue() {
return value;
}
public int getLevel() {
return level;
}
public boolean isRoot() {
return this.parent == null;
}
public String toString() {
String str;
if (this.parent != null) {
str = this.parent.value + '\t' + this.value;
} else {
str = this.value;
}
return str;
}
}
精彩评论