开发者

Groovy unicode support for MySQL select

I am trying to fix a problem in a UTF-8 encoded database. During the migration to the current database system it looks like the data was compressed to Latin-1 encoding. I wrote a quick Groovy script to find and report the encoding errors. But, it fails to match anything.

If I write the analogous code in Java the report works fine!

So, I presume as a rookie with Groovy I'm doing something stupid. Can anyone see the error in my approach? I've tried hard-coding the characters in the select statement but i cannot get any of the special characters to work.

Thanks of course in advance.

aH

Groovy Code

import groovy.sql.Sql

        def dias = ["è": "é", "…": "…"]

        def atk = Sql.newInstance(
            'jdbc:mysql://myURL:3306/myDB?useUnicode=true&characterEncoding=utf8',
            'user',
            'pass',
            'com.mysql.jdbc.Driver'
        )

        dias.each() {x ->
            println x.getKey()
            atk.eachRow("SELECT `noteContent` FROM `ArchDescriptionRepeatingData`"
            + "WHERE `noteContent` LIKE(\"%" + x.getKey() + "%\")"
            ) { y ->
                println y
            }
        }

as requested here is the java code, which I added some extra functionality to:

import java.sql.*;
import java.util.*;
import java.util.regex.*;

    public class Diacritic {
        private Connection connection;
        private HashMap<String, String> dia;

        Diacritic() throws SQLException, ClassNotFoundException {
            Class.forName("com.mysql.jdbc.Driver");
            connection = DriverManager.getConnection(
                "jdbc:mysql://myURL:3306/myDB"
                + "?user=user&password=pass");
            dia = new HashMap<String, String>();

            populateArray();

            System.out.println("Component Titles");

            for(Map.Entry e : dia.entrySet()) {
                String st = e.getKey().toString();
                Pattern p = Pattern.compile(st);
                System.out.println();
                System.out.println(st + "\t\t->\t" + e.getValue().toString());
                System.out.println();
                Statement s = connection.createStatement();
                ResultSet r = s.executeQuery("SELECT `itemValue` FROM `ListOrderedItems`"
                    + "WHERE `itemValue` LIKE(\"%" + st + "%\")"
                    //+ "LIMIT 0, 100000"
                );

                int x = 0;

                while (r.next()) {
                    x++;
                    Matcher m = p.matcher(r.getString(1));
                    if(m.find()) {

                    int start = 0;
                开发者_JS百科    int mstart = m.start();
                    int mend = m.end();
                    int end = r.getString(1).length();
                    if (mstart - start <= 10 || end - mend <= 10) {
                        System.out.println(x + ": " + r.getString(1)
                            + "\t->\t"
                            + "\t"  + r.getString(1).substring(0,mstart)
                            + e.getValue().toString()
                            + r.getString(1).substring(mend)
                            );

                    }
                    else {
                        System.out.println(x + ": " + r.getString(1).substring((mstart - 10), (mend + 10))
                            + "\t->\t"
                            + "\t"  + r.getString(1).substring((mstart - 10),mstart)
                            + e.getValue().toString()
                            + r.getString(1).substring(mend, (mend + 10))
                        );
                    }

                    }
                }

                s.close();
            }


        }

        public static void main (String[] args) throws SQLException, ClassNotFoundException {
            Diacritic d = new Diacritic();
        }

        public void populateArray() {
            dia.put("è", "é");
            dia.put("…", "…");
            dia.put("—", "");
            dia.put("‑”", "-");
            dia.put("é", "é");
            dia.put("æ", "æ");
            dia.put("ë", "ë");
            //etc...
        }   
    }
0

上一篇:

下一篇:

精彩评论

暂无评论...
验证码 换一张
取 消

最新问答

问答排行榜