/* * Created on Mar 15, 2005 * * TODO To change the template for this generated file go to * Window - Preferences - Java - Code Style - Code Templates */ package cc.glsn; import java.io.File; import java.io.FileInputStream; import java.util.ArrayList; /** * @author fireduck * * TODO To change the template for this generated type comment go to * Window - Preferences - Java - Code Style - Code Templates */ public class HTMLTableExtractor { public boolean RemoveBraces; public HTMLTableExtractor() { RemoveBraces=true; } /** * * @param HTMLDoc * @return An arraylist of html table objects */ public ArrayList extract(String HTMLDoc) { ArrayList Output=new ArrayList(); Stato S=new Stato(HTMLDoc,0,HTMLDoc.length()); while(true) { ArrayList newTable=extractTable(S); if (newTable==null) return Output; Output.add(newTable); } } public ArrayList extractFile(String Filename) throws java.io.IOException { File F=new File(Filename); int len=new Long(F.length()).intValue(); byte Buff[]=new byte[len]; FileInputStream FIS=new FileInputStream(F); int r=0; while(r=S.end) return null; int eidx=S.Str.indexOf(End,sidx); if (eidx<0) return null; if (eidx>=S.end) return null; Stato S2=new Stato(S.Str,sidx,eidx); S.start=eidx+End.length(); return S2; } private ArrayList extractTable(Stato S) { Stato TableData=extractEntry(S,""); if (TableData==null) return null; ArrayList Table=new ArrayList(); while(true) { ArrayList Row=extractTableRow(TableData); if (Row==null) return Table; Table.add(Row); } } private ArrayList extractTableRow(Stato S) { Stato RowData=extractEntry(S,""); if (RowData==null) return null; ArrayList Row=new ArrayList(); while(true) { String D=extractTableData(RowData); if (D==null) return Row; Row.add(D); } } private String extractTableData(Stato S) { Stato TDA=extractEntry(S,""); Stato TDB=extractEntry(S,""); Stato TD=TDA; if ((TD==null) || ((TDB != null) && (TDB.start < TDA.start))) { TD=TDB; } if (TD==null) return null; String strTD=TD.Str.substring(TD.start,TD.end); if (RemoveBraces) { strTD=removeBraces(strTD); } //System.out.println("Record: " + strTD); return strTD; } private String removeBraces(String S) { int sidx=S.indexOf("<"); int eidx=S.indexOf(">"); if ((sidx >=0) && (sidx < eidx)) { String S2=S.substring(0,sidx) + S.substring(eidx+1); return removeBraces(S2); } return S; } private class Stato { int start; int end; String Str; public Stato(String H, int Start, int End) { Str=H; start=Start; end=End; if (end-start < 2048) { Str=H.substring(Start,End); start=0; end=Str.length(); } } } }