urt-8轉成GBK 之多種方法
阿新 • • 發佈:2018-11-02
/** * <p>Description:獲取字串後的第二個?後的字串</p> * @author sunshaofeng * @date 2018-9-18 15:45 * @return * @version 1.0 */ private static String getStr(String str, int n) { try { int i = 0; int s = 0; String ss=""; //迴圈遍歷 while (i++ < n) { s = str.indexOf("?", s + 1); if (s == -1) { return str; } } ss=new String(str.substring(s+2)); //獲取編碼方式 String encoding = getEncoding(ss); logger.info("encoding :"+encoding); logger.info("ENCODE BEFORE :"+ss); //方法一 try { String gbkStyle = gbk2Utf(ss); logger.info("gbk2Utf encodeToGBK ONE:"+gbkStyle); } catch (Exception e) { e.printStackTrace(); } //方法二 try { String gbkStyle2 = new String(getUTF8BytesFromGBKString(ss), "UTF-8"); logger.info("gbk2Utf encodeToGBK TWO:"+gbkStyle2); } catch (Exception e) { e.printStackTrace(); } //方法三 try { String gbkStyle3 = charsetConvert(ss,"UTF-8"); logger.info("gbk2Utf encodeToGBK THREE:"+gbkStyle3); } catch (Exception e) { e.printStackTrace(); } //方法四 try { String gbkStyle41 = gbkToUnicode(ss); String gbkStyle42 = unicodeToUtf8(gbkStyle41); logger.info("gbk2Utf encodeToGBK FOUR:"+gbkStyle42); String encoding1 = getEncoding(gbkStyle42); logger.info("encoding After:"+encoding1); } catch (Exception e) { e.printStackTrace(); } //方法五 try { String gbkStyle5=new String(ss.getBytes("GB2312"),"UTF-8");//轉換後的結果 logger.info("gbk2Utf encodeToGBK FIVE:"+gbkStyle5); } catch (Exception e) { e.printStackTrace(); } //方法六 try { String gbkStyle6 = URLEncoder.encode (ss, "UTF-8" ); logger.info("gbk2Utf encodeToGBK 6:"+gbkStyle6); } catch (Exception e) { e.printStackTrace(); } return ss; } catch (Exception e) { e.printStackTrace(); } return null; } /** * GBK轉UTF-8 方式一 */ private static String gbk2Utf(String gbk) throws UnsupportedEncodingException { char[] c = gbk.toCharArray(); byte[] fullByte = new byte[3*c.length]; for (int i=0; i<c.length; i++) { String binary = Integer.toBinaryString(c[i]); StringBuffer sb = new StringBuffer(); int len = 16 - binary.length(); //前面補零 for(int j=0; j<len; j++){ sb.append("0"); } sb.append(binary); //增加位,達到到24位3個位元組 sb.insert(0, "1110"); sb.insert(8, "10"); sb.insert(16, "10"); fullByte[i*3] = Integer.valueOf(sb.substring(0, 8), 2).byteValue();//二進位制字串建立整型 fullByte[i*3+1] = Integer.valueOf(sb.substring(8, 16), 2).byteValue(); fullByte[i*3+2] = Integer.valueOf(sb.substring(16, 24), 2).byteValue(); } //模擬UTF-8編碼的網站顯示 return(new String(fullByte,"UTF-8")); } /** * GBK轉 UTF-8方式二: */ public static byte[] getUTF8BytesFromGBKString(String gbkStr) { int n = gbkStr.length(); byte[] utfBytes = new byte[3 * n]; int k = 0; for (int i = 0; i < n; i++) { int m = gbkStr.charAt(i); if (m < 128 && m >= 0) { utfBytes[k++] = (byte) m; continue; } utfBytes[k++] = (byte) (0xe0 | (m >> 12)); utfBytes[k++] = (byte) (0x80 | ((m >> 6) & 0x3f)); utfBytes[k++] = (byte) (0x80 | (m & 0x3f)); } if (k < utfBytes.length) { byte[] tmp = new byte[k]; System.arraycopy(utfBytes, 0, tmp, 0, k); return tmp; } return utfBytes; } /** * GBK轉 UTF-8 方式三 */ private static String charsetConvert(String str, String charset) { try { str = new sun.misc.BASE64Encoder().encode(str.getBytes(charset)); byte[] bytes = new sun.misc.BASE64Decoder().decodeBuffer(str); str = new String(bytes, charset); } catch(IOException e) { e.printStackTrace(); } return str; } /** * 獲取字串的編碼方式 * @param str * @return */ public static String getEncoding(String str) { String encode = "GB2312"; try { if (str.equals(new String(str.getBytes(encode), encode))) { String s = encode; return s; } } catch (Exception exception) { } encode = "ISO-8859-1"; try { if (str.equals(new String(str.getBytes(encode), encode))) { String s1 = encode; return s1; } } catch (Exception exception1) { } encode = "UTF-8"; try { if (str.equals(new String(str.getBytes(encode), encode))) { String s2 = encode; return s2; } } catch (Exception exception2) { } encode = "GBK"; try { if (str.equals(new String(str.getBytes(encode), encode))) { String s3 = encode; return s3; } } catch (Exception exception3) { } return ""; } /** * gbk轉unicode * @param str * @return */ public static String gbkToUnicode(String str) { StringBuffer result = new StringBuffer(); for (int i = 0; i < str.length(); i++) { char chr1 = (char) str.charAt(i); if ((chr1 & (0x00FF)) == chr1) { result.append(chr1); continue; } result.append("\\u" + Integer.toHexString((int) chr1)); } return result.toString(); } /** * unicode轉utf-8 * @param theString * @return */ public static String unicodeToUtf8(String theString) { char aChar; int len = theString.length(); StringBuffer outBuffer = new StringBuffer(len); for (int x = 0; x < len;) { aChar = theString.charAt(x++); if (aChar == '\\') { aChar = theString.charAt(x++); if (aChar == 'u') { int value = 0; for (int i = 0; i < 4; i++) { aChar = theString.charAt(x++); switch (aChar) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': value = (value << 4) + aChar - '0'; break; case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': value = (value << 4) + 10 + aChar - 'a'; break; case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': value = (value << 4) + 10 + aChar - 'A'; break; default: throw new IllegalArgumentException( "Malformed \\uxxxx encoding."); } } outBuffer.append((char) value); } else { if (aChar == 't') aChar = '\t'; else if (aChar == 'r') aChar = '\r'; else if (aChar == 'n') aChar = '\n'; else if (aChar == 'f') aChar = '\f'; outBuffer.append(aChar); } } else outBuffer.append(aChar); } return outBuffer.toString(); }