/* WordNet Access Library Copyright (C) 2006 Pavel Simakov http://www.softwaresecretweapons.com This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ package com.oy.shared.wn.prolog; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.sql.SQLException; import java.util.regex.Matcher; import java.util.regex.Pattern; import com.oy.shared.lw.misc.ITrace; import com.oy.shared.wn.db.IDataAccessRWConnection; public class WNPrologRuleLoader { interface ILineProcesor { public void processLine(String line); } abstract class RegexLineProcessor implements ILineProcesor { private String patt; private int groups; public RegexLineProcessor(String regex, int groups){ this.groups = groups; patt = escapeRegex(regex); } public void processLine(String line){ Matcher mat = Pattern.compile(patt).matcher(line); if (mat. find()){ String [] values = new String[groups]; for (int i=0; i < values.length; i++){ values[i] = mat.group(i + 1); } try { processRecord(values); } catch(Throwable e){ trace.error("Failed to process record " + values, e); throw new RuntimeException(e); } } else { throw new RuntimeException("Error locating pattern " + patt + " in " + line); } } protected abstract void processRecord(String [] values); } private final String EXT = "pl"; private IDataAccessRWConnection conn; private int skip; private int done; private ITrace trace; public void rebuild(ITrace trace, IDataAccessRWConnection conn, String filePath) throws SQLException, IOException { this.conn = conn; this.trace = trace; skip = 0; done = 0; initDatabase(); trace.info("+ begin " + filePath); processFolder( new java.io.File(filePath), filePath, EXT ); trace.info("+ end " + filePath + ", done " + (done) + ", skip " + skip); } private void initDatabase() throws SQLException { conn.execUpdate("USE MYSQL;"); conn.execUpdate("DROP DATABASE IF EXISTS WN_20;"); conn.execUpdate("CREATE DATABASE WN_20;"); conn.execUpdate("USE WN_20;"); } private void processFolder(File file, String fileName, String tmplFileNameExt) throws SQLException, IOException { if (!file.exists()){ throw new IllegalArgumentException("Folder does not exist " + file.getAbsolutePath()); } if (file.isDirectory()) { String[] children = file.list(); for (int i=0; i < children.length; i++){ processFolder(new java.io.File(file, children[i]), children[i], tmplFileNameExt); } } else { if (fileName.endsWith(tmplFileNameExt)) { if (processFile(file, fileName)) { done++; } else { skip++; } } } } private static String escapeRegex(String pat){ // * - any char // % any number StringBuffer sb = new StringBuffer(); for (int i=0; i < pat.length() ; i++){ if ( pat.charAt(i) == '.' || pat.charAt(i) == ',' || pat.charAt(i) == '(' || pat.charAt(i) == ')' || pat.charAt(i) == '\'' ){ sb.append("\\" + pat.charAt(i)); } else { if (pat.charAt(i) == '*'){ sb.append("(.*)"); } else { if (pat.charAt(i) == '%'){ sb.append("(\\d*)"); }else { sb.append(pat.charAt(i)); } } } } return sb.toString(); } private String qa(int count){ StringBuffer sb = new StringBuffer(); for (int i=0; i < count; i++){ if (i != 0){ sb.append(", "); } sb.append("?"); } return sb.toString(); } private void insert(String tableName, String [] values){ try { conn.execUpdate("insert into " + tableName + " values (" + qa(values.length) + ")", values); } catch(Exception e){ throw new RuntimeException(e); } } private boolean processFile(File file, String fileName) throws SQLException, IOException { String num6 = "(" + " synset_id decimal(10), " + " w_num decimal(10), " + " word varchar(255), " + " ss_type varchar(5), " + " sense_number decimal(10), " + " tag_count decimal(10)," + " index synset_id_idx (synset_id)," + " index word_idx (word)" + ")"; String num4 = "(" + " synset_id_1 decimal(10), " + " w_num_1 decimal(10), " + " synset_id_2 decimal(10)," + " w_num_2 decimal(10)," + " index synset_id_1_idx (synset_id_1)," + " index synset_id_2_idx (synset_id_2)" + ")"; String num3 = "(" + " synset_id_1 decimal(10), " + " synset_id_2 decimal(10)," + " class_type varchar(255)," + " index synset_id_1_idx (synset_id_1)," + " index synset_id_2_idx (synset_id_2)" + ")"; String num3v = "(" + " synset_id decimal(10), " + " f_num decimal(10)," + " w_num decimal(10)," + " index synset_id_idx (synset_id)" + ")"; String num2 = "(" + " synset_id_1 decimal(10), " + " synset_id_2 decimal(10)," + " index synset_id_1_idx (synset_id_1)," + " index synset_id_2_idx (synset_id_2)" + ")"; String num2g = "(" + " synset_id decimal(10), " + " gloss varchar(255)," + " index synset_id_idx (synset_id)," + " index gloss_idx (gloss)" + ")"; if ("wn_ant.pl".equalsIgnoreCase(fileName)){ conn.execUpdate("create table wn_ant " + num4); RegexLineProcessor rlp = new RegexLineProcessor("ant(%,%,%,%).", 4) { protected void processRecord(String [] values){ insert("wn_ant", values); } }; processLineFile(file, fileName, rlp); return true; } if ("wn_at.pl".equalsIgnoreCase(fileName)){ conn.execUpdate("create table wn_at " + num2); RegexLineProcessor rlp = new RegexLineProcessor("at(%,%).", 2) { protected void processRecord(String [] values){ insert("wn_at", values); } }; processLineFile(file, fileName, rlp); return true; } if ("wn_cls.pl".equalsIgnoreCase(fileName)){ conn.execUpdate("create table wn_cls " + num3); RegexLineProcessor rlp = new RegexLineProcessor("cls(%,%,*).", 3) { protected void processRecord(String [] values){ insert("wn_cls", values); } }; processLineFile(file, fileName, rlp); return true; } if ("wn_cs.pl".equalsIgnoreCase(fileName)){ conn.execUpdate("create table wn_cs " + num2); RegexLineProcessor rlp = new RegexLineProcessor("cs(%,%).", 2) { protected void processRecord(String [] values){ insert("wn_cs", values); } }; processLineFile(file, fileName, rlp); return true; } if ("wn_der.pl".equalsIgnoreCase(fileName)){ conn.execUpdate("create table wn_der " + num4); RegexLineProcessor rlp = new RegexLineProcessor("der(%,%,%,%).", 4) { protected void processRecord(String [] values){ insert("wn_der", values); } }; processLineFile(file, fileName, rlp); return true; } if ("wn_ent.pl".equalsIgnoreCase(fileName)){ conn.execUpdate("create table wn_ent " + num2); RegexLineProcessor rlp = new RegexLineProcessor("ent(%,%).", 2) { protected void processRecord(String [] values){ insert("wn_ent", values); } }; processLineFile(file, fileName, rlp); return true; } if ("wn_fr.pl".equalsIgnoreCase(fileName)){ conn.execUpdate("create table wn_fr " + num3v); RegexLineProcessor rlp = new RegexLineProcessor("fr(%,%,%).", 3) { protected void processRecord(String [] values){ insert("wn_fr", values); } }; processLineFile(file, fileName, rlp); return true; } if ("wn_g.pl".equalsIgnoreCase(fileName)){ conn.execUpdate("create table wn_g " + num2g); RegexLineProcessor rlp = new RegexLineProcessor("g(%,'*').", 2) { protected void processRecord(String [] values){ insert("wn_g", values); } }; processLineFile(file, fileName, rlp); return true; } if ("wn_hyp.pl".equalsIgnoreCase(fileName)){ conn.execUpdate("create table wn_hyp " + num2); RegexLineProcessor rlp = new RegexLineProcessor("hyp(%,%).", 2) { protected void processRecord(String [] values){ insert("wn_hyp", values); } }; processLineFile(file, fileName, rlp); return true; } if ("wn_mm.pl".equalsIgnoreCase(fileName)){ conn.execUpdate("create table wn_mm " + num2); RegexLineProcessor rlp = new RegexLineProcessor("mm(%,%).", 2) { protected void processRecord(String [] values){ insert("wn_mm", values); } }; processLineFile(file, fileName, rlp); return true; } if ("wn_mp.pl".equalsIgnoreCase(fileName)){ conn.execUpdate("create table wn_mp " + num2); RegexLineProcessor rlp = new RegexLineProcessor("mp(%,%).", 2) { protected void processRecord(String [] values){ insert("wn_mp", values); } }; processLineFile(file, fileName, rlp); return true; } if ("wn_ms.pl".equalsIgnoreCase(fileName)){ conn.execUpdate("create table wn_ms " + num2); RegexLineProcessor rlp = new RegexLineProcessor("ms(%,%).", 2) { protected void processRecord(String [] values){ insert("wn_ms", values); } }; processLineFile(file, fileName, rlp); return true; } if ("wn_per.pl".equalsIgnoreCase(fileName)){ conn.execUpdate("create table wn_per " + num4); RegexLineProcessor rlp = new RegexLineProcessor("per(%,%,%,%).", 4) { protected void processRecord(String [] values){ insert("wn_per", values); } }; processLineFile(file, fileName, rlp); return true; } if ("wn_ppl.pl".equalsIgnoreCase(fileName)){ conn.execUpdate("create table wn_ppl " + num4); RegexLineProcessor rlp = new RegexLineProcessor("ppl(%,%,%,%).", 4) { protected void processRecord(String [] values){ insert("wn_ppl", values); } }; processLineFile(file, fileName, rlp); return true; } if ("wn_s.pl".equalsIgnoreCase(fileName)){ conn.execUpdate("create table wn_s " + num6); RegexLineProcessor rlp = new RegexLineProcessor("s(%,%,'*',*,%,%).", 6) { protected void processRecord(String [] values){ insert("wn_s", values); } }; processLineFile(file, fileName, rlp); return true; } if ("wn_sa.pl".equalsIgnoreCase(fileName)){ conn.execUpdate("create table wn_sa " + num4); RegexLineProcessor rlp = new RegexLineProcessor("sa(%,%,%,%).", 4) { protected void processRecord(String [] values){ insert("wn_sa", values); } }; processLineFile(file, fileName, rlp); return true; } if ("wn_sim.pl".equalsIgnoreCase(fileName)){ conn.execUpdate("create table wn_sim " + num2); RegexLineProcessor rlp = new RegexLineProcessor("sim(%,%).", 2) { protected void processRecord(String [] values){ insert("wn_sim", values); } }; processLineFile(file, fileName, rlp); return true; } if ("wn_vgp.pl".equalsIgnoreCase(fileName)){ conn.execUpdate("create table wn_vgp " + num4); RegexLineProcessor rlp = new RegexLineProcessor("vgp(%,%,%,%).", 4) { protected void processRecord(String [] values){ insert("wn_vgp", values); } }; processLineFile(file, fileName, rlp); return true; } return false; } private void processLineFile(File file, String fileName, ILineProcesor processor) throws IOException { trace.info("> begin " + fileName); int count = 0; // load file BufferedReader in = new BufferedReader( new FileReader(file)); String line; while ((line = in.readLine()) != null) { processor.processLine(line); count++; } in.close(); trace.info("> end " + fileName + ", " + count + " items"); } }