Develop/PHP
[PHP] 한글 자음모음 분리 함수
시크라기
2012. 4. 27. 10:00
1 function utfCharToNumber($char) { 2 $i = 0; 3 $number = ''; 4 $convmap = array(0x80, 0xffff, 0, 0xffff); 5 6 $number = mb_encode_numericentity($char, $convmap, 'UTF-8'); 7 8 return $number; 9 } 10 11 function strToArray($str){ 12 $result = array(); 13 $stop = mb_strlen($str, 'UTF-8'); 14 for( $idx = 0; $idx < $stop; $idx++) 15 { 16 $result[] = mb_substr($str, $idx, 1, 'UTF-8'); 17 } 18 return $result; 19 } 20 21 function parseInt($string) { 22 if(preg_match('/(\d+)/', $string, $array)) { 23 return $array[1]; 24 } else { 25 return 0; 26 } 27 } 28 29 function mb_chr($ord) 30 { 31 if ($ord < 128) return \chr($ord); // 1-byte 32 for ($i = 1; $i < 6 && $ord >= (1 << 5*$i+6); $i++); // units 33 $chr = \chr(($ord >> $i*6) + 256-(1 << 6-$i+1)); // start byte 34 for ($i -= 1; $i >= 0; $i--) // multi-bytes 35 { 36 $chr .= \chr((63 & ($ord >> $i*6)) + 128); 37 } 38 return $chr; 39 } 40 41 function make_jamo($str){ 42 //초성(19자) ㄱ ㄲ ㄴ ㄷ ㄸ ㄹ ㅁ ㅂ ㅃ ㅅ ㅆ ㅇ ㅈ ㅉ ㅊ ㅋ ㅌ ㅍ ㅎ 43 $ChoSeong = array (0x3131, 0x3132, 0x3134, 0x3137, 0x3138, 44 0x3139, 0x3141, 0x3142, 0x3143, 0x3145, 0x3146, 0x3147, 0x3148, 45 0x3149, 0x314a, 0x314b, 0x314c, 0x314d, 0x314e ); 46 47 //중성(21자) ㅏ ㅐ ㅑ ㅒ ㅓ ㅔ ㅕ ㅖ ㅗ ㅘ(9) ㅙ(10) ㅚ(11) ㅛ ㅜ ㅝ(14) ㅞ(15) ㅟ(16) ㅠ ㅡ ㅢ(19) ㅣ 48 $JungSeong = array (0x314f, 0x3150, 0x3151, 0x3152, 0x3153, 49 0x3154, 0x3155, 0x3156, 0x3157, 0x3158, 0x3159, 0x315a, 0x315b, 50 0x315c, 0x315d, 0x315e, 0x315f, 0x3160, 0x3161, 0x3162, 0x3163 ); 51 52 //종성(28자) <없음> ㄱ ㄲ ㄳ(3) ㄴ ㄵ(5) ㄶ(6) ㄷ ㄹ ㄺ(9) ㄻ(10) ㄼ(11) ㄽ(12) ㄾ(13) ㄿ(14) ㅀ(15) ㅁ ㅂ ㅄ(18) ㅅ ㅆ ㅇ ㅈ ㅊ ㅋ ㅌ ㅍ ㅎ 53 $JongSeong = array (0x0000, 0x3131, 0x3132, 0x3133, 0x3134, 54 0x3135, 0x3136, 0x3137, 0x3139, 0x313a, 0x313b, 0x313c, 0x313d, 55 0x313e, 0x313f, 0x3140, 0x3141, 0x3142, 0x3144, 0x3145, 0x3146, 56 0x3147, 0x3148, 0x314a, 0x314b, 0x314c, 0x314d, 0x314e ); 57 58 $chars = array(); 59 $result = array(); 60 $array_str = array(); 61 $array_str = strToArray($str); 62 63 for($i = 0 ; $i < mb_strlen($str, 'UTF-8'); $i++){ 64 $one_char_num = utfCharToNumber($array_str[$i]); 65 $one_char_num = substr($one_char_num, 2, mb_strlen($one_char_num, 'UTF-8')-3); 66 67 // "AC00:가" ~ "D7A3:힣"에 속한 글자만 분해.(한글 모두) 68 if($one_char_num >= 0xAC00 && $one_char_num <= 0xD7A3){ 69 $i1 = 0; 70 $i2 = 0; 71 $i3 = 0; 72 73 $i3 = $one_char_num - 0xAC00; 74 $i1 = $i3 / (21 * 28); 75 $i3 = $i3 % (21 * 28); 76 77 $i2 = $i3 / 28; 78 $i3 = $i3 % 28; 79 80 $result[] = mb_chr($ChoSeong[parseInt($i1)]); 81 82 switch(parseInt($i2)){ 83 case 9 : $result[] = 'ㅗㅏ'; break; 84 case 10 : $result[] = 'ㅗㅐ'; break; 85 case 11 : $result[] = 'ㅗㅣ'; break; 86 case 14 : $result[] = 'ㅜㅓ'; break; 87 case 15 : $result[] = 'ㅜㅔ'; break; 88 case 16 : $result[] = 'ㅜㅣ'; break; 89 case 19 : $result[] = 'ㅡㅣ'; break; 90 91 default : $result[] = mb_chr($JungSeong[parseInt($i2)]); break; 92 } 93 94 if ($i3 != 0x0000) // c가 0이 아니면, 즉 받침이 있으면 95 { 96 //복자음 분리 97 switch(parseInt($i3)){ 98 case 3 : $result[] = 'ㄱㅅ'; break; 99 case 5 : $result[] = 'ㄴㅈ'; break; 100 case 6 : $result[] = 'ㄴㅎ'; break; 101 case 9 : $result[] = 'ㄹㄱ'; break; 102 case 10 : $result[] = 'ㄹㅁ'; break; 103 case 11 : $result[] = 'ㄹㅂ'; break; 104 case 12 : $result[] = 'ㄹㅅ'; break; 105 case 13 : $result[] = 'ㄹㅌ'; break; 106 case 14 : $result[] = 'ㄹㅍ'; break; 107 case 15 : $result[] = 'ㄹㅎ'; break; 108 case 18 : $result[] = 'ㅂㅅ'; break; 109 default : $result[] = mb_chr($JongSeong[parseInt($i3)]); break; 110 } 111 } 112 }else{ 113 $result[] = $array_str[$i]; 114 } 115 } 116 117 return $result; 118 }