1 package au.gov.amsa.ihs.reader; 2 3 import java.io.File; 4 import java.io.FileInputStream; 5 import java.io.FileNotFoundException; 6 import java.io.IOException; 7 import java.io.InputStream; 8 import java.io.PrintStream; 9 import java.util.Arrays; 10 import java.util.Map; 11 import java.util.TreeSet; 12 13 import com.github.davidmoten.rx.Checked; 14 15 import rx.Observable; 16 import rx.functions.Action1; 17 import rx.functions.Func0; 18 import rx.functions.Func1; 19 20 public class ExtractAllTabDelimitedMain { 21 22 public static Observable<Map<String, String>> extractMaps(File file, String parentElementName) { 23 Func0<InputStream> resourceFactory = Checked.f0(() -> new FileInputStream(file)); 24 Func1<InputStream, Observable<Map<String, String>>> observableFactory = is -> Observable 25 .just(is).lift(new OperatorIhsReader(parentElementName)); 26 Action1<InputStream> disposeAction = Checked.a1(is -> is.close()); 27 return Observable.using(resourceFactory, observableFactory, disposeAction); 28 } 29 30 public static void writeDelimited(String parentElementName, File output, File... files) { 31 Observable<Map<String, String>> o = Observable.from(files) 32 .flatMap(file -> extractMaps(file, parentElementName)); 33 TreeSet<String> keys = o 34 .collect(() -> new TreeSet<String>(), (set, map) -> set.addAll(map.keySet())) 35 .toBlocking().single(); 36 System.out.println(keys); 37 try { 38 PrintStream out = new PrintStream(output); 39 { 40 boolean isFirst = true; 41 for (String key : keys) { 42 if (!isFirst) 43 out.print("\t"); 44 out.print(key); 45 isFirst = false; 46 } 47 out.println(); 48 } 49 o.forEach(map -> { 50 boolean isFirst = true; 51 for (String key : keys) { 52 String value = map.get(key); 53 if (value == null) 54 value = ""; 55 if (!isFirst) 56 out.print("\t"); 57 out.print(value); 58 isFirst = false; 59 } 60 out.println(); 61 }); 62 out.close(); 63 } catch (IOException e) { 64 throw new RuntimeException(e); 65 } 66 } 67 68 public static void main(String[] args) throws FileNotFoundException, IOException { 69 File directory = new File("/media/an/ship-data/ihs/608750-2015-04-01/"); 70 File output = new File("target"); 71 72 extract(directory, output, "ShipData", "ShipData.xml", "ShipData1.xml"); 73 extract(directory, output, "ShipData", "ShipData1.xml"); 74 extractSimple(directory, output, "tblBuilderAndSubcontractorLinkFile"); 75 extractSimple(directory, output, "tblClassCodes"); 76 extractSimple(directory, output, "tblCompanyDetailsAll"); 77 extractSimple(directory, output, "tblCompanyFullDetailsWithCodesAndParent"); 78 extractSimple(directory, output, "tblEngineBuilderCodes"); 79 extractSimple(directory, output, "tblEngineDesignerCodes"); 80 extractSimple(directory, output, "tblFlagCodes"); 81 extractSimple(directory, output, "tblFlagHistory"); 82 extractSimple(directory, output, "tblHullTypeCodes"); 83 extractSimple(directory, output, "tblNameHistory"); 84 extractSimple(directory, output, "tblPandICodes"); 85 extractSimple(directory, output, "tblPortOfRegistryFullCodes"); 86 extractSimple(directory, output, "tblPropulsionTypeDecode"); 87 extractSimple(directory, output, "tblShipTypeCodes"); 88 extractSimple(directory, output, "tblStatusCodes"); 89 } 90 91 private static void extractSimple(File directory, File outputDirectory, String parentElement) { 92 extract(directory, outputDirectory, parentElement, parentElement + ".xml"); 93 } 94 95 private static void extract(File directory, File outputDirectory, String parentElement, 96 String... names) { 97 File[] files = Arrays.stream(names).map(name -> new File(directory, name)) 98 .toArray(n -> new File[n]); 99 writeDelimited(parentElement, new File(outputDirectory, names[0].replace("xml", "txt")), 100 files); 101 } 102 }