diff --git a/build.xml b/build.xml new file mode 100644 index 0000000..896733c --- /dev/null +++ b/build.xml @@ -0,0 +1,203 @@ + + + + Build Babel. + Also used by Hudson Babel project. + JUnit test is available for this build. + $Id: build_Babel.xml $ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/babel/spanningtrees/Cognate.java b/src/babel/spanningtrees/Cognate.java new file mode 100644 index 0000000..711a10c --- /dev/null +++ b/src/babel/spanningtrees/Cognate.java @@ -0,0 +1,12 @@ +package babel.spanningtrees; + +import java.util.ArrayList; +import java.util.List; + +public class Cognate { + int GlossID; + int MultistateCode; + List languages = new ArrayList(); + List word = new ArrayList(); + List edges = new ArrayList(); // encoded as (0,1) (0,3) entries in languages list +} diff --git a/src/babel/spanningtrees/CognateData.java b/src/babel/spanningtrees/CognateData.java new file mode 100644 index 0000000..226a82e --- /dev/null +++ b/src/babel/spanningtrees/CognateData.java @@ -0,0 +1,233 @@ +package babel.spanningtrees; + +import java.io.File; +import java.text.DecimalFormat; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +public class CognateData { + + // map GlossID to Gloss + Map glosses; + // map GlossID to map of MultistateCode to Cognate + Map> cognateGlossMap; + + String getGloss(int GlossID) { + return glosses.get(GlossID); + } + + Map getCognates(int GlossID) { + return cognateGlossMap.get(GlossID); + } + + void loadCognateData(String fileName) throws Exception { + System.err.println("Loading " + fileName); + glosses = new HashMap(); + cognateGlossMap = new HashMap>(); + + List entries = CognateIO.readCognates(new File(fileName)); + + for (Entry entry : entries) { + glosses.put(entry.GlossID, entry.Gloss); + if (cognateGlossMap.containsKey(entry.GlossID)) { + Map cognateMap = cognateGlossMap.get(entry.GlossID); + if (cognateMap.containsKey(entry.MultistateCode)) { + Cognate cognate = cognateMap.get(entry.MultistateCode); + cognate.languages.add(entry.Language); + cognate.word.add(entry.Word); + } else { + Cognate cognate = new Cognate(); + cognate.GlossID = entry.GlossID; + cognate.MultistateCode = entry.MultistateCode; + cognate.languages.add(entry.Language); + cognate.word.add(entry.Word); + cognateMap.put(entry.MultistateCode, cognate); + } + } else { + Map cognateMap = new HashMap(); + Cognate cognate = new Cognate(); + cognate.GlossID = entry.GlossID; + cognate.MultistateCode = entry.MultistateCode; + cognate.languages.add(entry.Language); + cognate.word.add(entry.Word); + cognateMap.put(entry.MultistateCode, cognate); + cognateGlossMap.put(entry.GlossID, cognateMap); + } + } + } + + void calcSpanningTrees(Map locations) { + for (Map cognateMap : cognateGlossMap.values()) { + List cognates = new ArrayList(); + cognates.addAll(cognateMap.values()); + for (Cognate cognate : cognates) { + List splitCognates = calcSpanningTree(cognate, locations, cognateMap.keySet()); + for (Cognate c : splitCognates) { + cognateMap.put(c.MultistateCode, c); + } + } + } + } + + + + private List calcSpanningTree(Cognate cognate, Map locations, Set MultistateCodes) { + List splitCognates = new ArrayList(); + if (cognate.MultistateCode == 0) { + // missing data + splitCognates.add(cognate); + return splitCognates; + } + // collect locations associated with languages + List locs = new ArrayList(); + for (String language : cognate.languages) { + Location loc = locations.get(language); + if (loc == null) { + loc = locations.get(language); + } + locs.add(loc); + } + // create distance matrix + double [][] dist = new double[locs.size()][locs.size()]; + for (int i = 0; i < locs.size(); i++) { + for (int j = i+1; j < locs.size(); j++) { + dist[i][j] = distance(locs.get(i), locs.get(j)); +// double averageLong = (locs.get(i).longitude + locs.get(j).longitude) / 2.0; +// double delta = 500 * (averageLong - 113.7199742531)/(153.5701790672 - 113.7199742531); +// dist[i][j] += delta; +// dist[j][i] = dist[i][j]; + } + } + // find spanning tree + int[] group = new int[locs.size()]; + for (int i = 0; i < group.length; i++) { + group[i] = i; + } + double minDist = 0; + boolean progress = true; + +// longitude latitude +// max 153.5701790672 -9.9422964683 +// min 113.7199742531 -38.471266 +// distance(x,y) = GreatCircleDistance(x,y) + 500 * (1.0-(long(x)+long(y))/2 - 113.7199742531)/(153.5701790672-113.7199742531)) + + for (int i = 0; i < group.length-1 && progress; i++) { + progress = false; + // find minimum distance + minDist = Double.MAX_VALUE; + int min1 = -1; int min2 = -1; + for (int x = 0; x < group.length; x++) { + for (int y = x + 1; y < group.length; y++) { + if (group[x] != group[y] && dist[x][y] < minDist) { + minDist = dist[x][y]; + min1 = x; + min2 = y; + } + + } + } + if (minDist < CognateIO.COGNATE_SPLIT_THRESHOLD) { + progress = true; + int g = group[min2]; + for (int x = 0; x < group.length; x++) { + if (group[x] == g) { + group[x] = group[min1]; + } + } + cognate.edges.add(min1); + cognate.edges.add(min2); + } + } + // assemble cognates + if (minDist < CognateIO.COGNATE_SPLIT_THRESHOLD) { + splitCognates.add(cognate); + return splitCognates; + } + + Cognate[] splits = new Cognate[group.length]; + //MultistateCodes.remove(cognate.MultistateCode); + Set dup = new HashSet(); + dup.addAll(MultistateCodes); + dup.remove(cognate.MultistateCode); + for (int i = 0; i < group.length; i++) { + if (splits[group[i]] == null) { + splits[group[i]] = new Cognate(); + splits[group[i]].GlossID = cognate.GlossID; + splits[group[i]].MultistateCode = cognate.MultistateCode; + while (dup.contains(splits[group[i]].MultistateCode)) { + splits[group[i]].MultistateCode++; + } + Integer code = splits[group[i]].MultistateCode; + dup.add(code); + splitCognates.add(splits[group[i]]); + } + splits[group[i]].languages.add(cognate.languages.get(i)); + splits[group[i]].word.add(cognate.word.get(i)); + } + for (int i = 0; i < cognate.edges.size(); i+= 1) { + int edge = cognate.edges.get(i); + int groupID = group[edge]; + String language = cognate.languages.get(edge); + int edgeID = splits[groupID].languages.indexOf(language); + splits[groupID].edges.add(edgeID); + } + + // print to stdout +// DecimalFormat format = new DecimalFormat("####"); +// System.out.print(cognate.GlossID + " " + getGloss(cognate.GlossID) + " " + cognate.MultistateCode + " " + format.format(minDist) + " " + splitCognates.size()); +// for (Cognate c: splitCognates) { +// System.out.print(" ("); +// for (int k = 0; k < c.languages.size(); k++) { +// System.out.print(c.languages.get(k) + " " + c.word.get(k++)); +// if (k < c.languages.size() - 1) { +// System.out.print(", "); +// } +// } +// System.out.print(")"); +// } +// System.out.println(); + + + DecimalFormat format = new DecimalFormat("####"); + System.out.print(cognate.GlossID + " " + getLink(splitCognates.get(0).word.get(0)) + " " + format.format(minDist) + " " + splitCognates.size()); + for (Cognate c: splitCognates) { + System.out.print(" ("); + for (int k = 0; k < c.languages.size(); k++) { + System.out.print(c.languages.get(k)); + if (k < c.languages.size() - 1) { + System.out.print(", "); + } + } + System.out.print(")"); + } + System.out.println(); + return splitCognates; + } + + private String getLink(String word) { + String link = "" + strs[0] + " " + strs[2] + ""; + return link; + } + + static public double distance(Location location, Location location2) { + // great cirlce distance + double fLat1 = location.latitude; + double fLong1 = location.longitude; + double fLat2 = location2.latitude; + double fLong2 = location2.longitude; + fLat1 *= Math.PI/180.0; + fLat2 *= Math.PI/180.0; + fLong1 *= Math.PI/180.0; + fLong2 *= Math.PI/180.0; + double fDist = 6371.01*Math.acos(Math.sin(fLat1)*Math.sin(fLat2) + Math.cos(fLat1)*Math.cos(fLat2)*Math.cos(fLong1-fLong2)); + return fDist; + } + +} diff --git a/src/babel/spanningtrees/CognateIO.java b/src/babel/spanningtrees/CognateIO.java new file mode 100644 index 0000000..9f62165 --- /dev/null +++ b/src/babel/spanningtrees/CognateIO.java @@ -0,0 +1,344 @@ +package babel.spanningtrees; + +import java.awt.Color; +import java.io.BufferedOutputStream; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileOutputStream; +import java.io.FileReader; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.Set; +import java.util.Vector; + +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.xpath.XPath; +import javax.xml.xpath.XPathConstants; +import javax.xml.xpath.XPathFactory; + +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; + +public class CognateIO { + //final static public String KML_FILE = "locations.kml"; + final static public String KML_FILE = "/home/remco/data/beast/aboriginal/pny10/pny.kml"; + final static public String BG_FILE = "bg(-41.47,110.72)x(-6.94,156.57).png"; + final static public String NEXUSFILE = "filtered.nex"; + //final static public String DATAFILE = "cognates7recoded.dat"; + //public static final int NTAX = 194; + //public static final int NGLOSSIDS = 205; + final static public String DATAFILE = "/home/remco/data/beast/aboriginal/pny10/pny10.tab"; + public static final int NTAX = 299; + public static int NGLOSSIDS = 184; + static Map l2l = new HashMap(); + + // if FILL_IN_MISSING_DATA = true, mark those less than THRESHOLD distance as missing, + // and keep the remainder at zero + final static boolean FILL_IN_MISSING_DATA = true; + + // if there are equal or less than MINIMUM_COGNATE_SIZE + // in a cognate, do not output cognate to NEXUSFILE + final static int MINIMUM_COGNATE_SIZE = 1; + + //final static double MISSING_DATA_THRESHOLD = CognateData.THRESHOLD; + final static double MISSING_DATA_THRESHOLD = 1000; + + + static double COGNATE_SPLIT_THRESHOLD = 750.0; + + + public static List readCognates(File file) throws Exception { + List entries = new ArrayList(); + BufferedReader fin = new BufferedReader(new FileReader(file)); + String sStr = null; + // eat up header + sStr = fin.readLine(); + while (fin.ready()) { + sStr = fin.readLine(); + String [] strs = sStr.split("\t"); + if (strs.length != 1) { + Entry entry = new Entry(); + entry.GlossID = Integer.parseInt(strs[0]); + entry.Gloss = strs[1]; + entry.Subgroup = strs[2]; + entry.Language = strs[3]; + l2l.put(entry.Language.replaceAll("[-_ '`’\\\\]", ""), entry.Language); + entry.Language = entry.Language.replaceAll("[-_ '`’\\\\]", ""); + entry.Word = strs[4]; + entry.MultistateCode = Integer.parseInt(strs[5]); + entries.add(entry); + } + } + fin.close(); + return entries; + } + + public static void writeCognates(File file, List entries) throws Exception { + PrintStream out = new PrintStream(new BufferedOutputStream(new FileOutputStream(file))); + out.println("GlossID Gloss Subgroup Language Word MultistateCode"); + for (Entry entry : entries) { + out.println(entry.GlossID+ "\t" +entry.Gloss+ "\t" +entry.Subgroup+ "\t" +entry.Language+ "\t" +entry.Word+ "\t" +entry.MultistateCode); + } + out.close(); + } + + + public static void writeCognatesToNexus(File file, Map> cognateGlossMap, Map locations) throws Exception { + List languages = new ArrayList(); + Map cognateMap = cognateGlossMap.get(1); + for (Cognate cognate : cognateMap.values()) { + for (String language : cognate.languages) { + languages.add(language); + } + } + + Map mapLanguageToSequence = new HashMap(); + for (int i = 0; i < languages.size(); i++) { + mapLanguageToSequence.put(languages.get(i), i); + } + List [] sequences = new List[languages.size()]; + for (int i = 0; i < languages.size(); i++) { + sequences[i] = new ArrayList(); + } + + int [] count = new int [250]; + for (Map cognatemap : cognateGlossMap.values()) { + Set missing = new HashSet(); + for (Cognate cognate : cognatemap.values()) { + if (cognate.MultistateCode == 0) { + missing.addAll(cognate.languages); + break; + } + } + for (Cognate cognate : cognatemap.values()) { + + if (cognate.MultistateCode > 0 && cognate.languages.size() > MINIMUM_COGNATE_SIZE) { + List cognateLocations = new ArrayList(); + for (String language : cognate.languages) { + cognateLocations.add(locations.get(language)); + } + + count[cognate.languages.size()]++; + if (cognate.languages.size() > 50) { + System.err.println(cognate.languages.size() + " " + cognate.GlossID + " " + cognate.MultistateCode); + } + + int [] code = new int[sequences.length]; + // mark those less than THRESHOLD distance as missing, + // and keep the remainder at zero + if (FILL_IN_MISSING_DATA) { + for (String language : missing) { + Location loc1 = locations.get(language); + if (loc1 != null) { + double minDist = Double.MAX_VALUE; + for (Location loc2 : cognateLocations) { + double dist = CognateData.distance(loc1, loc2); + minDist = Math.min(minDist, dist); + } + if (minDist < MISSING_DATA_THRESHOLD) { + code[mapLanguageToSequence.get(language)] = -1; + } + } + } + +// // mark zero's as 'death' if sufficiently far away +// for (String language : missing) { +// code[mapLanguageToSequence.get(language)] = -1; +// } +// for (int i = 0; i < code.length; i++) { +// if (code[i] == 0) { +// String language = languages.get(i); +// Location loc1 = locations.get(language); +// if (loc1 != null) { +// double minDist = Double.MAX_VALUE; +// for (Location loc2 : cognateLocations) { +// double dist = CognateData.distance(loc1, loc2); +// minDist = Math.min(minDist, dist); +// } +// if (minDist >= MISSING_DATA_THRESHOLD) { +// code[mapLanguageToSequence.get(language)] = -2; +// } +// } +// } +// } + + + + } else { + for (String language : missing) { + code[mapLanguageToSequence.get(language)] = -1; + } + } + for (String language : cognate.languages) { + code[mapLanguageToSequence.get(language)] = 1; + } + for (int i = 0; i < languages.size(); i++) { + sequences[i].add(code[i]); + } + } + } + } + + for (int i = 0; i < 100; i++) { + System.err.println(i + " " + count[i]); + } + + PrintStream out = new PrintStream(new BufferedOutputStream(new FileOutputStream(file))); + out.println("#NEXUS"); + out.println("Begin data;"); + out.println("Dimensions ntax=" + languages.size() +" nchar=" + sequences[0].size() +";"); + out.println("Format datatype=binary symbols=\"01\" gap=-;"); + out.println("Matrix"); + for (int i = 0; i < languages.size(); i++) { + out.print("\"" + l2l.get(languages.get(i)).replace(" ", "") + "\" "); + for (int j = 0; j < sequences[0].size(); j++) { + switch (sequences[i].get(j)) { + case -1 : + out.print('-'); + break; + case -2 : + out.print('2'); + break; + case 0 : + out.print('0'); + break; + case 1 : + out.print('1'); + break; + default: + out.print(sequences[i].get(j)); + break; + } + } + out.println(); + } + out.println("End;"); + out.close(); + } + + + /** grabs placemarks out of kml files **/ + static public Map loadKMLFile(String sFileName) { + Random rand = new Random(10); + + System.err.println("Loading " + sFileName); + Map map = new HashMap(); + try { + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + factory.setValidating(false); + org.w3c.dom.Document doc = factory.newDocumentBuilder().parse(new File(sFileName)); + doc.normalize(); + + // grab styles out of the KML file + HashMap mapStyleToColor = new HashMap(); + NodeList oStyles = doc.getElementsByTagName("Style"); + for (int iNode = 0; iNode < oStyles.getLength(); iNode++) { + Node oStyle = oStyles.item(iNode); + String sID = oStyle.getAttributes().getNamedItem("id").getTextContent(); + XPath xpath = XPathFactory.newInstance().newXPath(); + String expression = ".//IconStyle/color"; + Node oColor = (Node) xpath.evaluate(expression, oStyles.item(iNode), XPathConstants.NODE); + if (oColor != null) { + String sColor = oColor.getTextContent(); + sColor = sColor.substring(2); + Integer nColor = Integer.parseInt(sColor, 16); + + nColor = 0x0000FF + rand.nextInt(0xFFFF) * 0xFF; + mapStyleToColor.put(sID, nColor); + } + } + + // grab polygon info from placemarks + NodeList oPlacemarks = doc.getElementsByTagName("Placemark"); + for (int iNode = 0; iNode < oPlacemarks.getLength(); iNode++) { + String sPlacemarkName = ""; + Vector nX = new Vector(); + Vector nY = new Vector(); + Node node = oPlacemarks.item(iNode); + NodeList oChildren = node.getChildNodes(); + long color = 0x808080; + for (int iChild = 0; iChild < oChildren.getLength(); iChild++) { + Node oChild = oChildren.item(iChild); + if (oChild.getNodeType() == Node.ELEMENT_NODE) { + String sName = oChild.getNodeName(); + if (sName.equals("name")) { + sPlacemarkName = oChild.getTextContent(); + } else if (sName.equals("Style")) { + String expression = ".//PolyStyle/color"; + XPath xpath = XPathFactory.newInstance().newXPath(); + Node oColor = (Node) xpath.evaluate(expression, oStyles.item(iNode), XPathConstants.NODE); + if (oColor != null) { + String sColor = oColor.getTextContent(); + sColor = sColor.substring(2); + color = Integer.parseInt(sColor, 16); + } + } else if (sName.equals("styleUrl")) { + String sID = oChild.getTextContent(); + sID = sID.substring(2); + if (mapStyleToColor.containsKey(sID)) { + color = mapStyleToColor.get(sID); + } + //} else if (sName.equals("description")) { + //sDescription = oChild.getTextContent(); + } else if (sName.equals("Polygon") || sName.equals("Point") || sName.equals("LineString")) { + XPath xpath = XPathFactory.newInstance().newXPath(); + String expression = ".//coordinates"; + Node oCoords = (Node) xpath.evaluate(expression, oChild, XPathConstants.NODE); + String sCoord = oCoords.getTextContent(); + String [] sCoords = sCoord.split("\\s+"); + for (int i = 0; i < sCoords.length; i++) { + String sStr = sCoords[i]; + String [] sStrs = sStr.split(","); + if (sStrs.length > 1) { + //Point point = new Point(); + try { + nX.add(Double.parseDouble(sStrs[0]));// * Parser.MAX_LATITUDE_INT_UNITS / 360)); + nY.add(Double.parseDouble(sStrs[1]));///180f) * Parser.MAX_LONGITUDE_INT_UNITS)); + } catch (NumberFormatException e) { + System.err.println("Problem with " + sPlacemarkName + " " + e.getMessage()); + } + } + } + } + } + } + if (nX.size() > 0) { + Location poly = new Location(); + poly.latitude = nY.get(0); + poly.longitude = nX.get(0); + color = Math.abs(color); + long r = color%256; + long g = (color/256) % 256; + long b = color/(256*256)% 256; + poly.color = new Color((int)r, (int)g, (int)b); +// System.out.println(sPlacemarkName + " " + poly.color.getRed() + " " +poly.color.getGreen() + " " + poly.color.getBlue() ); + map.put(sPlacemarkName, poly); + } + } + + } catch (Exception e) { + e.printStackTrace(); + } + return map; + } // loadKMLFile + + + public static void main(String[] args) throws Exception { + List entries = CognateIO.readCognates(new File(CognateIO.DATAFILE)); + + int missingCount = 0; + for (Entry entry : entries) { + if (entry.MultistateCode == 0) { + missingCount ++; + } + } + System.err.println(entries.size() + " " + missingCount); + //TCognateIO.writeCognates(new File("x.dat"), entries); + } + +} diff --git a/src/babel/spanningtrees/Entry.java b/src/babel/spanningtrees/Entry.java new file mode 100644 index 0000000..04bf0d9 --- /dev/null +++ b/src/babel/spanningtrees/Entry.java @@ -0,0 +1,10 @@ +package babel.spanningtrees; + +public class Entry { + public int GlossID; + public String Gloss; + public String Subgroup; + public String Language; + public String Word; + public int MultistateCode; +} diff --git a/src/babel/spanningtrees/Location.java b/src/babel/spanningtrees/Location.java new file mode 100644 index 0000000..100e26a --- /dev/null +++ b/src/babel/spanningtrees/Location.java @@ -0,0 +1,9 @@ +package babel.spanningtrees; + +import java.awt.Color; + +public class Location { + public double latitude; + public double longitude; + public Color color; +} diff --git a/src/babel/spanningtrees/Panel.java b/src/babel/spanningtrees/Panel.java new file mode 100644 index 0000000..6837a42 --- /dev/null +++ b/src/babel/spanningtrees/Panel.java @@ -0,0 +1,587 @@ +package babel.spanningtrees; + + +import java.awt.AlphaComposite; +import java.awt.BasicStroke; +import java.awt.Color; +import java.awt.Font; +import java.awt.Graphics; +import java.awt.Graphics2D; +import java.awt.event.KeyEvent; +import java.awt.event.KeyListener; +import java.awt.image.BufferedImage; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileOutputStream; +import java.io.FileReader; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import javax.imageio.ImageIO; +import javax.swing.JFrame; +import javax.swing.JPanel; + +import beast.core.util.Log; + +import com.itextpdf.awt.PdfGraphics2D; +import com.itextpdf.text.pdf.PdfContentByte; +import com.itextpdf.text.pdf.PdfWriter; + +public class Panel extends JPanel implements KeyListener { + private static final long serialVersionUID = 1L; + + int mode = DRAW_GLOSS; + final static int DRAW_ALL_GLOSS = 0; + final static int DRAW_GLOSS = 1; + + BufferedImage m_bgImage; + double [] m_fBGImageBox = new double[4]; + /** extreme values for position information **/ + public float m_fMaxLong, m_fMaxLat, m_fMinLong, m_fMinLat; + + Map locations; + CognateData data; + + int [][] edgecount; + List languages; + + int GlossID = 1; + + public Panel(String [] args) { + parseArgs(args); + addKeyListener(this); + } + + private void parseArgs(String[] args) { + CognateIO.COGNATE_SPLIT_THRESHOLD = 3000; + CognateIO.COGNATE_SPLIT_THRESHOLD = 6000; + CognateIO.COGNATE_SPLIT_THRESHOLD = 7000; + CognateIO.NGLOSSIDS = 207; + + int i = 0; + while (i < args.length) { + String arg = args[i]; + switch (arg) { + case "-maxdist": + if (i+1 >= args.length) { + Log.warning.println("-maxdist argument requires another argument"); + printUsageAndExit(); + } + CognateIO.COGNATE_SPLIT_THRESHOLD = Integer.parseInt(args[i+1]); + i += 2; + break; + case "-words": + if (i+1 >= args.length) { + Log.warning.println("-words argument requires another argument"); + printUsageAndExit(); + } + CognateIO.NGLOSSIDS = Integer.parseInt(args[i+1]); + i += 2; + break; + case "-kml": + if (i+1 >= args.length) { + Log.warning.println("-kml argument requires another argument"); + printUsageAndExit(); + } + KML_FILE = args[i+1]; + i += 2; + break; + case "-bg": + if (i+1 >= args.length) { + Log.warning.println("-bg argument requires another argument"); + printUsageAndExit(); + } + BG_FILE = args[i+1]; + i += 2; + break; + case "-nex": + if (i+1 >= args.length) { + Log.warning.println("-nex argument requires another argument"); + printUsageAndExit(); + } + NEXUS_FILE = args[i+1]; + i += 2; + break; + case "-cognates": + if (i+1 >= args.length) { + Log.warning.println("-cognates argument requires another argument"); + printUsageAndExit(); + } + COGNATE_FILE = args[i+1]; + i += 2; + break; + case "-h": + case "-help": + case "--help": + printUsageAndExit(); + break; + default: + Log.warning.println("unrecognised command " + arg); + printUsageAndExit(); + } + } + + } + + private void printUsageAndExit() { + System.out.println("java babel.spanningtree.Panel [options]"); + System.out.println("Draws spanning trees of congates"); + System.out.println("-maxdist maximum allowed distance of branches. Any branch over will result in spanning trees being broken up and message being logged."); + System.out.println("-words number of meaning classes (default 207)"); + System.out.println("-kml kml file with point locations for each of the languages"); + System.out.println("-bg image file with world map in Mercator projection"); + System.out.println("-nex specify nexus file with binary data"); + System.out.println("-cognates specify cognate file with labels for each column in the nexus file"); + System.out.println("-h, -help print this message"); + System.exit(0); + } + + void loadBGImage(String sFileName) throws Exception { + m_bgImage = ImageIO.read(new File(sFileName)); + try { + Pattern pattern = Pattern + .compile(".*\\(([0-9\\.Ee-]+),([0-9\\.Ee-]+)\\)x\\(([0-9\\.Ee-]+),([0-9\\.Ee-]+)\\).*"); + Matcher matcher = pattern.matcher(sFileName); + matcher.find(); + m_fBGImageBox[1] = Float.parseFloat(matcher.group(1)); + m_fBGImageBox[0] = Float.parseFloat(matcher.group(2)); + m_fBGImageBox[3] = Float.parseFloat(matcher.group(3)); + m_fBGImageBox[2] = Float.parseFloat(matcher.group(4)); + } catch (Exception e) { + final double[] fBGImageBox = { -180, -90, 180, 90 }; + m_fBGImageBox = fBGImageBox; + } + } // loadBGImage + + @Override + protected void paintComponent(Graphics g) { + super.paintComponent(g); + Graphics2D g2 = (Graphics2D) g; + + + int nW2 = m_bgImage.getWidth(); + int nH2 = m_bgImage.getHeight(); + + int x0 = (int)(nW2 * (m_fMinLong- m_fBGImageBox[0])/(m_fBGImageBox[2] - m_fBGImageBox[0])); + int x1 = (int)(nW2 * (m_fMaxLong- m_fBGImageBox[0])/(m_fBGImageBox[2] - m_fBGImageBox[0])); + int y0 = (int)(nH2 * (m_fMaxLat- m_fBGImageBox[3])/(m_fBGImageBox[1] - m_fBGImageBox[3])); + int y1 = (int)(nH2 * (m_fMinLat- m_fBGImageBox[3])/(m_fBGImageBox[1] - m_fBGImageBox[3])); + + double m_fScaleX = 10; + double m_fScaleY = 10; + int nW = (int) getWidth(); + int nH = (int) getHeight(); + m_fScaleX = (nW + 0.0f) / (m_fMaxLong - m_fMinLong); + m_fScaleY = (nH + 0.0f) / (m_fMaxLat - m_fMinLat); + + + g.drawImage(m_bgImage, + 0, 0, getWidth(), getHeight(), + x0, + y0, + x1, + y1, + null); + + g2.setColor(Color.red); + g2.setFont(new Font(Font.DIALOG, Font.BOLD, 20)); + if (GlossID <= CognateIO.NGLOSSIDS) { + g2.drawString(GlossID + ": " + data.getGloss(GlossID), 10, 400); + } + System.err.println(GlossID + ": " + data.getGloss(GlossID)); + + for (String language: locations.keySet()) { + Location loc = locations.get(language); + g.setColor(loc.color); + g.setColor(new Color(0x5050a0)); + int gx = (int) ((loc.longitude - m_fMinLong) * m_fScaleX); + int gy = (int) ((m_fMaxLat - loc.latitude) * m_fScaleY); + g2.setStroke(new BasicStroke(3.0f)); + g.drawOval(gx - 3, gy - 3, 5, 5); + g.drawOval(gx - 5, gy - 5, 10, 10); + g.drawOval(gx - 7, gy - 7, 14, 14); + + g2.setFont(new Font(Font.SANS_SERIF, Font.BOLD, 8)); + //g.drawString(language, gx+10, gy); + } + + + g.setColor(Color.black); + +//final int JITTER = 12; +int JITTER = 8; +switch (mode) { +case DRAW_ALL_GLOSS: + JITTER = 12; + ((Graphics2D) g).setComposite(AlphaComposite.getInstance(AlphaComposite.SRC_OVER, 0.10f)); + g2.setStroke(new BasicStroke(3.0f)); + for (GlossID = 1; GlossID < CognateIO.NGLOSSIDS; GlossID++) { + plot(g2, JITTER, m_fScaleX, m_fScaleY); + } + break; +case DRAW_GLOSS: + g2.setStroke(new BasicStroke(5.0f)); + ((Graphics2D) g).setComposite(AlphaComposite.getInstance(AlphaComposite.SRC_OVER, 1.0f)); + plot(g2, JITTER, m_fScaleX, m_fScaleY); + break; + +} + + +//} + +// g.setColor(Color.black); +// ((Graphics2D) g).setComposite(AlphaComposite.getInstance(AlphaComposite.SRC_OVER, 1.0f)); +// for (int i = 0; i < edgecount.length; i++) { +// for (int j = i; j < edgecount.length; j++) { +// if (edgecount[i][j] > 10) { +// Location loc0 = locations.get(languages.get(i)); +// Location loc1 = locations.get(languages.get(j)); +// x0 = (int) ((loc0.longitude - m_fMinLong) * m_fScaleX); +// y0 = (int) ((m_fMaxLat - loc0.latitude) * m_fScaleY); +// x1 = (int) ((loc1.longitude - m_fMinLong) * m_fScaleX); +// y1 = (int) ((m_fMaxLat - loc1.latitude) * m_fScaleY); +// g.drawString(edgecount[i][j] + "", (x0+x1)/2, (y0+y1)/2); +// } +// +// } +// } + } + + void plot(Graphics2D g2, int JITTER, double m_fScaleX, double m_fScaleY) { + Map map = data.getCognates(GlossID); + if (map != null) { + + g2.setFont(new Font(Font.DIALOG, Font.BOLD, 14)); + for (Cognate c : map.values()) { + Random rand = new Random();//c.MultistateCode+2); + Color color = new Color(Color.HSBtoRGB( + ((float)c.MultistateCode/map.size()) * ((float)c.MultistateCode/map.size()) * rand.nextFloat(), + 0.5f + rand.nextFloat()/2.0f, + 0.90f)); + color = new Color(rand.nextInt(0xFFFF) * 0xFF | 0x80); + g2.setColor(color); + for (int i = 0; i < c.languages.size(); i++) { + if (c.MultistateCode > 0) { + Location loc = locations.get(c.languages.get(i)); + int x0 = (int) ((loc.longitude - m_fMinLong) * m_fScaleX); + int y0 = (int) ((m_fMaxLat - loc.latitude) * m_fScaleY); + String word = c.word.get(i) + ":" + c.MultistateCode; + //g2.drawString(word, x0, y0); + } + } + + + List edges = c.edges; + for (int i = 0; i < edges.size(); i += 2) { + int p0 = edges.get(i); + int p1 = edges.get(i + 1); + Location loc0 = locations.get(c.languages.get(p0)); + Location loc1 = locations.get(c.languages.get(p1)); + int x0 = (int) ((loc0.longitude - m_fMinLong) * m_fScaleX); + int y0 = (int) ((m_fMaxLat - loc0.latitude) * m_fScaleY); + int x1 = (int) ((loc1.longitude - m_fMinLong) * m_fScaleX); + int y1 = (int) ((m_fMaxLat - loc1.latitude) * m_fScaleY); + x0 += rand.nextInt(JITTER) - JITTER/2; + x1 += rand.nextInt(JITTER) - JITTER/2; + y0 += rand.nextInt(JITTER) - JITTER/2; + y1 += rand.nextInt(JITTER) - JITTER/2; + g2.drawLine(x0, y0, x1, y1); + double dist = CognateData.distance(loc0, loc1); + //g.drawString((dist + " ").substring(0,6) , (x0 + x1)/2, (y0+y1)/2); + } + } + } + } + + + static public String KML_FILE = "/home/remco/data/beast/ie/saskyY3/geo/ie.kml"; + static public String BG_FILE = "/home/remco/data/map/IEsmall.png"; + //static public String NEXUSFILE = "filtered.nex"; + //final static public String DATAFILE = "cognates7recoded.dat"; + //public static final int NTAX = 194; + //public static final int NGLOSSIDS = 205; + static public String NEXUS_FILE = "/home/remco/data/beast/ie/saskyY3/geo/IELex.nex"; + static public String COGNATE_FILE = "/home/remco/data/beast/ie/saskyY3/geo/cognates.dat"; + + + void loadLocations() { + locations = CognateIO.loadKMLFile(KML_FILE); + + m_fMinLat = 90; + m_fMinLong = 180; + m_fMaxLat = -90; + m_fMaxLong = -180; + for (Location loc: locations.values()) { + m_fMinLat = Math.min(m_fMinLat, (float) loc.latitude); + m_fMaxLat = Math.max(m_fMaxLat, (float) loc.latitude); + m_fMinLong = Math.min(m_fMinLong, (float) loc.longitude); + m_fMaxLong = Math.max(m_fMaxLong, (float) loc.longitude); + } + float fOffset = 3f; + m_fMaxLong = m_fMaxLong + fOffset; + m_fMaxLat = m_fMaxLat + fOffset; + m_fMinLong = m_fMinLong - fOffset; + m_fMinLat = m_fMinLat - fOffset; + } + + void loadData(final String nexusFile, final String cognateFile) throws Exception { + data = new CognateData() { + @Override + void loadCognateData(String fileName) throws Exception { + System.err.println("Loading " + fileName); + glosses = new HashMap(); + cognateGlossMap = new HashMap>(); + + List entries = readCognates(); + + for (Entry entry : entries) { + glosses.put(entry.GlossID, entry.Gloss); + if (cognateGlossMap.containsKey(entry.GlossID)) { + Map cognateMap = cognateGlossMap.get(entry.GlossID); + if (cognateMap.containsKey(entry.MultistateCode)) { + Cognate cognate = cognateMap.get(entry.MultistateCode); + cognate.languages.add(entry.Language); + cognate.word.add(entry.Word); + } else { + Cognate cognate = new Cognate(); + cognate.GlossID = entry.GlossID; + cognate.MultistateCode = entry.MultistateCode; + cognate.languages.add(entry.Language); + cognate.word.add(entry.Word); + cognateMap.put(entry.MultistateCode, cognate); + } + } else { + Map cognateMap = new HashMap(); + Cognate cognate = new Cognate(); + cognate.GlossID = entry.GlossID; + cognate.MultistateCode = entry.MultistateCode; + cognate.languages.add(entry.Language); + cognate.word.add(entry.Word); + cognateMap.put(entry.MultistateCode, cognate); + cognateGlossMap.put(entry.GlossID, cognateMap); + } + } + } + + public List readCognates() throws Exception { + String str = null; + List mapPositionToCognate = new ArrayList<>(); + List mapPositionToGloss = new ArrayList<>(); + List mapPositionToGlossID = new ArrayList<>(); + List mapPositionToState = new ArrayList<>(); + File file = new File(cognateFile); + BufferedReader fin = new BufferedReader(new FileReader(file)); + int k = 0; + int gloss = 0; + while (fin.ready()) { + str = fin.readLine(); + if (str.matches(".*_group,")) { + str = str.replaceAll("\\s+\\d+\\s", ""); + mapPositionToCognate.add(str); + mapPositionToGloss.add("groupcode"); + k = 0; + gloss++; + mapPositionToGlossID.add(gloss); + mapPositionToState.add(k++); + } else { + str = str.replaceAll("\\s+\\d+\\s", ""); + mapPositionToCognate.add(str); + str = str.replaceAll("_.*", ""); + mapPositionToGloss.add(str); + mapPositionToGlossID.add(gloss); + mapPositionToState.add(k++); + } + } + fin.close(); + + List entries = new ArrayList(); + file = new File(nexusFile); + fin = new BufferedReader(new FileReader(file)); + String sStr = null; + // eat up header + do { + sStr = fin.readLine(); + } while (!sStr.matches(".*matrix.*")); + do { + sStr = fin.readLine(); + } while (sStr.matches(".*\\[.*")); + + // process data + while (fin.ready()) { + sStr = sStr.trim(); + String [] strs = sStr.split("\\s+"); + if (strs.length != 1) { + String lang = strs[0].replaceAll("'", ""); + String cognates = strs[1].trim(); + for (int i = 0; i < cognates.length(); i++) { + char c = cognates.charAt(i); + if (c == '1') { + Entry entry = new Entry(); + entry.GlossID = mapPositionToGlossID.get(i); + entry.Gloss = mapPositionToGloss.get(i); + entry.Subgroup = "x"; + entry.Language = lang; + entry.Word = mapPositionToCognate.get(i); + entry.MultistateCode = mapPositionToState.get(i); + entries.add(entry); + } + } + } + sStr = fin.readLine(); + if (sStr.matches(";")) { + fin.close(); + return entries; + } + } + fin.close(); + return entries; + } + + }; + data.loadCognateData(nexusFile); + data.calcSpanningTrees(locations); + //CognateIO.writeCognatesToNexus(new File(CognateIO.NEXUSFILE), data.cognateGlossMap, locations); + + + edgecount = new int[CognateIO.NTAX][CognateIO.NTAX]; + languages = new ArrayList(); + Map langMap = new HashMap(); + + for (GlossID = 1; GlossID < CognateIO.NGLOSSIDS; GlossID++) { + Map map = data.getCognates(GlossID); + if (map != null) { + for (Cognate c : map.values()) { + List edges = c.edges; + for (int i = 0; i < edges.size(); i += 2) { + String p0 = c.languages.get(edges.get(i)); + String p1 = c.languages.get(edges.get(i + 1)); + int i0 = -1, i1 = -1; + if (!langMap.containsKey(p0)) { + langMap.put(p0, languages.size()); + languages.add(p0); + } + if (!langMap.containsKey(p1)) { + langMap.put(p1, languages.size()); + languages.add(p1); + } + i0 = langMap.get(p0); + i1 = langMap.get(p1); + edgecount[Math.min(i0, i1)][Math.max(i0, i1)]++; + } + } + } + } + GlossID = 1; + + } + + + @Override + public void keyTyped(KeyEvent e) { + if (e.getKeyChar() == 'p') { + GlossID --; + if (GlossID < 1) { + GlossID = 1; + } + } + if (e.getKeyChar() == 'n') { + GlossID ++; + if (GlossID > CognateIO.NGLOSSIDS) { + GlossID = CognateIO.NGLOSSIDS; + } + } + repaint(); + + if (e.getKeyChar() == 'f') { + try { + com.itextpdf.text.Document doc = new com.itextpdf.text.Document(); + PdfWriter writer = PdfWriter.getInstance(doc, new FileOutputStream("/tmp/x.pdf")); + doc.setPageSize(new com.itextpdf.text.Rectangle(getWidth(), getHeight())); + doc.open(); + PdfContentByte cb = writer.getDirectContent(); + +// for (int i = 1; i < CognateIO.NGLOSSIDS; i++) { +// Graphics2D g2d = new PdfGraphics2D(cb, getWidth(), getHeight()); +// GlossID = i; +// paint(g2d); +// g2d.dispose(); +// doc.newPage(); +// System.err.println("page " + i); +// } + Graphics2D g2d = new PdfGraphics2D(cb, getWidth(), getHeight()); + paint(g2d); + g2d.dispose(); + g2d.dispose(); + + doc.close(); + } catch (Exception ex) { + ex.printStackTrace(); + } + + } + } + + @Override + public void keyPressed(KeyEvent e) { + // TODO Auto-generated method stub + + } + + @Override + public void keyReleased(KeyEvent e) { + // TODO Auto-generated method stub + + } + + public static void main(String[] args) throws Exception { + JFrame frame = new JFrame(); + frame.setSize(1024, 728); + Panel pane = new Panel(args); + pane.loadLocations(); + pane.loadData(NEXUS_FILE, COGNATE_FILE); + pane.loadBGImage(BG_FILE); + frame.add(pane); + frame.addKeyListener(pane); + frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); + frame.setVisible(true); + + if (false) + try { + com.itextpdf.text.Document doc = new com.itextpdf.text.Document(); + String label = CognateIO.KML_FILE; + label = label.substring(label.lastIndexOf('/') + 1, label.lastIndexOf('.')); + + PdfWriter writer = PdfWriter.getInstance(doc, new FileOutputStream("/tmp/" + label + ".pdf")); + doc.setPageSize(new com.itextpdf.text.Rectangle(pane.getWidth(), pane.getHeight())); + doc.open(); + PdfContentByte cb = writer.getDirectContent(); + + for (int i = 1; i < CognateIO.NGLOSSIDS; i++) { + Graphics2D g2d = new PdfGraphics2D(cb, pane.getWidth(), pane.getHeight()); + pane.GlossID = i; + pane.paint(g2d); + g2d.dispose(); + doc.newPage(); + System.err.println("page " + i); + } +// Graphics2D g2d = new PdfGraphics2D(cb, pane.getWidth(), pane.getHeight()); +// pane.paint(g2d); +// g2d.dispose(); +// g2d.dispose(); + + doc.close(); + System.exit(0); + } catch (Exception ex) { + ex.printStackTrace(); + } + + + } + +} diff --git a/templates/BinaryCTMC.xml b/templates/BinaryCTMC.xml new file mode 100755 index 0000000..8dc9eba --- /dev/null +++ b/templates/BinaryCTMC.xml @@ -0,0 +1,363 @@ + + +beast.app.beauti.BeautiConnector +beast.app.beauti.BeautiSubTemplate +beast.math.distributions.Prior + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +]]> + + + + + + + + + + + + + + + + + + + + Scales proportion of invariant sites parameter of partition $(n) + Scales mutation rate of partition s:$(n) + Scales gamma shape parameter of partition s:$(n) + Scale clock rate of partition c:$(n) + Scales birth rate of Yule prior for partition t:$(n) + Exchange values of frequencies of partition s:$(n) + + Estimates tip dates for tree t:$(n) + + Scales all internal nodes for tree t:$(n) + Scales root node for tree t:$(n) + Draws new internal node heights uniformally for tree t:$(n) + Performs subtree slide rearrangement of tree t:$(n) + Narrow exchange performs local rearrangement of tree t:$(n) + Wide exchange performs global rearrangement of tree t:$(n) + Performs Wilson-Balding global rearrangement of tree t:$(n) + Scale up substitution rate c:$(n) and scale down tree t:($n) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +]]> + + + + + + +]]> + + + + + +]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/templates/BinaryCovarion.xml b/templates/BinaryCovarion.xml new file mode 100755 index 0000000..78cc12a --- /dev/null +++ b/templates/BinaryCovarion.xml @@ -0,0 +1,395 @@ + + +beast.app.beauti.BeautiConnector +beast.app.beauti.BeautiSubTemplate +beast.math.distributions.Prior + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +]]> + + + + + + + + + + + + + + + + + + + + + + + Scales proportion of invariant sites parameter of partition $(n) + Scales mutation rate of partition s:$(n) + Scales mutation rate of partition s:$(n) + Scales mutation rate of partition s:$(n) + Scales gamma shape parameter of partition s:$(n) + Scale clock rate of partition c:$(n) + Scales birth rate of Yule prior for partition t:$(n) + Scales birth rate of Yule prior for partition t:$(n) + + Estimates frequencies for binary covarion model + Estimates hidden frequencies for binary covarion model + + Scales all internal nodes for tree t:$(n) + Scales root node for tree t:$(n) + Draws new internal node heights uniformally for tree t:$(n) + Performs subtree slide rearrangement of tree t:$(n) + Narrow exchange performs local rearrangement of tree t:$(n) + Wide exchange performs global rearrangement of tree t:$(n) + Performs Wilson-Balding global rearrangement of tree t:$(n) + Scale up substitution rate c:$(n) and scale down tree t:($n) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +]]> + + + + + + +]]> + + + + + +]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/templates/SDollo.xml b/templates/SDollo.xml new file mode 100755 index 0000000..6e042b7 --- /dev/null +++ b/templates/SDollo.xml @@ -0,0 +1,352 @@ + + +beast.app.beauti.BeautiConnector +beast.app.beauti.BeautiSubTemplate +beast.math.distributions.Prior + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +]]> + + + + + + + + + + + + + + + + + + Scales proportion of invariant sites parameter of partition $(n) + Scales mutation rate of partition s:$(n) + Scales gamma shape parameter of partition s:$(n) + Scale clock rate of partition c:$(n) + Scales birth rate of Yule prior for partition t:$(n) + + Estimates tip dates for tree t:$(n) + + Scales all internal nodes for tree t:$(n) + Scales root node for tree t:$(n) + Draws new internal node heights uniformally for tree t:$(n) + Performs subtree slide rearrangement of tree t:$(n) + Narrow exchange performs local rearrangement of tree t:$(n) + Wide exchange performs global rearrangement of tree t:$(n) + Performs Wilson-Balding global rearrangement of tree t:$(n) + Scale up substitution rate c:$(n) and scale down tree t:($n) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +]]> + + + + + + +]]> + + + + + +]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/version.xml b/version.xml new file mode 100644 index 0000000..e9c5152 --- /dev/null +++ b/version.xml @@ -0,0 +1,3 @@ + + +