:art: Add cn2an

huangqimin001 преди 9 месеца
родител
ревизия
8bdd2f3279
променени са 9 файла, в които са добавени 1108 реда и са изтрити 0 реда
  1. 16 0
      utils/cn2an/__init__.py
  2. 204 0
      utils/cn2an/an2cn.py
  3. 71 0
      utils/cn2an/an2cn_test.py
  4. 294 0
      utils/cn2an/cn2an.py
  5. 215 0
      utils/cn2an/cn2an_test.py
  6. 135 0
      utils/cn2an/conf.py
  7. 29 0
      utils/cn2an/performance.py
  8. 104 0
      utils/cn2an/transform.py
  9. 40 0
      utils/cn2an/transform_test.py

+ 16 - 0
utils/cn2an/__init__.py

@@ -0,0 +1,16 @@
1
+__version__ = "0.5.22"
2
+
3
+from .cn2an import Cn2An
4
+from .an2cn import An2Cn
5
+from .transform import Transform
6
+
7
+cn2an = Cn2An().cn2an
8
+an2cn = An2Cn().an2cn
9
+transform = Transform().transform
10
+
11
+__all__ = [
12
+    "__version__",
13
+    "cn2an",
14
+    "an2cn",
15
+    "transform"
16
+]

+ 204 - 0
utils/cn2an/an2cn.py

@@ -0,0 +1,204 @@
1
+from typing import Union
2
+from warnings import warn
3
+
4
+from proces import preprocess
5
+
6
+from .conf import NUMBER_LOW_AN2CN, NUMBER_UP_AN2CN, UNIT_LOW_ORDER_AN2CN, UNIT_UP_ORDER_AN2CN
7
+
8
+
9
+class An2Cn(object):
10
+    def __init__(self) -> None:
11
+        self.all_num = "0123456789"
12
+        self.number_low = NUMBER_LOW_AN2CN
13
+        self.number_up = NUMBER_UP_AN2CN
14
+        self.mode_list = ["low", "up", "rmb", "direct"]
15
+
16
+    def an2cn(self, inputs: Union[str, int, float] = None, mode: str = "low") -> str:
17
+        """阿拉伯数字转中文数字
18
+
19
+        :param inputs: 阿拉伯数字
20
+        :param mode: low 小写数字,up 大写数字,rmb 人民币大写,direct 直接转化
21
+        :return: 中文数字
22
+        """
23
+        if inputs is not None and inputs != "":
24
+            if mode not in self.mode_list:
25
+                raise ValueError(f"mode 仅支持 {str(self.mode_list)} !")
26
+
27
+            # 将数字转化为字符串,这里会有Python会自动做转化
28
+            # 1. -> 1.0 1.00 -> 1.0 -0 -> 0
29
+            if not isinstance(inputs, str):
30
+                inputs = self.__number_to_string(inputs)
31
+
32
+            # 数据预处理:
33
+            # 1. 繁体转简体
34
+            # 2. 全角转半角
35
+            inputs = preprocess(inputs, pipelines=[
36
+                "traditional_to_simplified",
37
+                "full_angle_to_half_angle"
38
+            ])
39
+
40
+            # 检查数据是否有效
41
+            self.__check_inputs_is_valid(inputs)
42
+
43
+            # 判断正负
44
+            if inputs[0] == "-":
45
+                sign = "负"
46
+                inputs = inputs[1:]
47
+            else:
48
+                sign = ""
49
+
50
+            if mode == "direct":
51
+                output = self.__direct_convert(inputs)
52
+            else:
53
+                # 切割整数部分和小数部分
54
+                split_result = inputs.split(".")
55
+                len_split_result = len(split_result)
56
+                if len_split_result == 1:
57
+                    # 不包含小数的输入
58
+                    integer_data = split_result[0]
59
+                    if mode == "rmb":
60
+                        output = self.__integer_convert(integer_data, "up") + "元整"
61
+                    else:
62
+                        output = self.__integer_convert(integer_data, mode)
63
+                elif len_split_result == 2:
64
+                    # 包含小数的输入
65
+                    integer_data, decimal_data = split_result
66
+                    if mode == "rmb":
67
+                        int_data = self.__integer_convert(integer_data, "up")
68
+                        dec_data = self.__decimal_convert(decimal_data, "up")
69
+                        len_dec_data = len(dec_data)
70
+
71
+                        if len_dec_data == 0:
72
+                            output = int_data + "元整"
73
+                        elif len_dec_data == 1:
74
+                            raise ValueError(f"异常输出:{dec_data}")
75
+                        elif len_dec_data == 2:
76
+                            if dec_data[1] != "零":
77
+                                if int_data == "零":
78
+                                    output = dec_data[1] + "角"
79
+                                else:
80
+                                    output = int_data + "元" + dec_data[1] + "角"
81
+                            else:
82
+                                output = int_data + "元整"
83
+                        else:
84
+                            if dec_data[1] != "零":
85
+                                if dec_data[2] != "零":
86
+                                    if int_data == "零":
87
+                                        output = dec_data[1] + "角" + dec_data[2] + "分"
88
+                                    else:
89
+                                        output = int_data + "元" + dec_data[1] + "角" + dec_data[2] + "分"
90
+                                else:
91
+                                    if int_data == "零":
92
+                                        output = dec_data[1] + "角"
93
+                                    else:
94
+                                        output = int_data + "元" + dec_data[1] + "角"
95
+                            else:
96
+                                if dec_data[2] != "零":
97
+                                    if int_data == "零":
98
+                                        output = dec_data[2] + "分"
99
+                                    else:
100
+                                        output = int_data + "元" + "零" + dec_data[2] + "分"
101
+                                else:
102
+                                    output = int_data + "元整"
103
+                    else:
104
+                        output = self.__integer_convert(integer_data, mode) + self.__decimal_convert(decimal_data, mode)
105
+                else:
106
+                    raise ValueError(f"输入格式错误:{inputs}!")
107
+        else:
108
+            raise ValueError("输入数据为空!")
109
+
110
+        return sign + output
111
+
112
+    def __direct_convert(self, inputs: str) -> str:
113
+        _output = ""
114
+        for d in inputs:
115
+            if d == ".":
116
+                _output += "点"
117
+            else:
118
+                _output += self.number_low[int(d)]
119
+        return _output
120
+
121
+    @staticmethod
122
+    def __number_to_string(number_data: Union[int, float]) -> str:
123
+        # 小数处理:python 会自动把 0.00005 转化成 5e-05,因此 str(0.00005) != "0.00005"
124
+        string_data = str(number_data)
125
+        if "e" in string_data:
126
+            string_data_list = string_data.split("e")
127
+            string_key = string_data_list[0]
128
+            string_value = string_data_list[1]
129
+            if string_value[0] == "-":
130
+                string_data = "0." + "0" * (int(string_value[1:]) - 1) + string_key
131
+            else:
132
+                string_data = string_key + "0" * int(string_value)
133
+        return string_data
134
+
135
+    def __check_inputs_is_valid(self, check_data: str) -> None:
136
+        # 检查输入数据是否在规定的字典中
137
+        all_check_keys = self.all_num + ".-"
138
+        for data in check_data:
139
+            if data not in all_check_keys:
140
+                raise ValueError(f"输入的数据不在转化范围内:{data}!")
141
+
142
+    def __integer_convert(self, integer_data: str, mode: str) -> str:
143
+        if mode == "low":
144
+            numeral_list = NUMBER_LOW_AN2CN
145
+            unit_list = UNIT_LOW_ORDER_AN2CN
146
+        elif mode == "up":
147
+            numeral_list = NUMBER_UP_AN2CN
148
+            unit_list = UNIT_UP_ORDER_AN2CN
149
+        else:
150
+            raise ValueError(f"error mode: {mode}")
151
+
152
+        # 去除前面的 0,比如 007 => 7
153
+        integer_data = str(int(integer_data))
154
+
155
+        len_integer_data = len(integer_data)
156
+        if len_integer_data > len(unit_list):
157
+            raise ValueError(f"超出数据范围,最长支持 {len(unit_list)} 位")
158
+
159
+        output_an = ""
160
+        for i, d in enumerate(integer_data):
161
+            if int(d):
162
+                output_an += numeral_list[int(d)] + unit_list[len_integer_data - i - 1]
163
+            else:
164
+                if not (len_integer_data - i - 1) % 4:
165
+                    output_an += numeral_list[int(d)] + unit_list[len_integer_data - i - 1]
166
+
167
+                if i > 0 and not output_an[-1] == "零":
168
+                    output_an += numeral_list[int(d)]
169
+
170
+        output_an = output_an.replace("零零", "零").replace("零万", "万").replace("零亿", "亿").replace("亿万", "亿") \
171
+            .strip("零")
172
+
173
+        # 解决「一十几」问题
174
+        if output_an[:2] in ["一十"]:
175
+            output_an = output_an[1:]
176
+
177
+        # 0 - 1 之间的小数
178
+        if not output_an:
179
+            output_an = "零"
180
+
181
+        return output_an
182
+
183
+    def __decimal_convert(self, decimal_data: str, o_mode: str) -> str:
184
+        len_decimal_data = len(decimal_data)
185
+
186
+        if len_decimal_data > 16:
187
+            warn(f"注意:小数部分长度为 {len_decimal_data} ,将自动截取前 16 位有效精度!")
188
+            decimal_data = decimal_data[:16]
189
+
190
+        if len_decimal_data:
191
+            output_an = "点"
192
+        else:
193
+            output_an = ""
194
+
195
+        if o_mode == "low":
196
+            numeral_list = NUMBER_LOW_AN2CN
197
+        elif o_mode == "up":
198
+            numeral_list = NUMBER_UP_AN2CN
199
+        else:
200
+            raise ValueError(f"error mode: {o_mode}")
201
+
202
+        for data in decimal_data:
203
+            output_an += numeral_list[int(data)]
204
+        return output_an

+ 71 - 0
utils/cn2an/an2cn_test.py

@@ -0,0 +1,71 @@
1
+import unittest
2
+
3
+from .an2cn import An2Cn
4
+
5
+
6
+class An2CnTest(unittest.TestCase):
7
+    def setUp(self) -> None:
8
+        self.input_data = {
9
+            0: ["零", "零", "零元整", "零"],
10
+            1: ["一", "壹", "壹元整", "一"],
11
+            11: ["十一", "壹拾壹", "壹拾壹元整", "一一"],
12
+            1000000: ["一百万", "壹佰万", "壹佰万元整", "一零零零零零零"],
13
+            1000054: ["一百万零五十四", "壹佰万零伍拾肆", "壹佰万零伍拾肆元整", "一零零零零五四"],
14
+            31000054: ["三千一百万零五十四", "叁仟壹佰万零伍拾肆", "叁仟壹佰万零伍拾肆元整", "三一零零零零五四"],
15
+            9876543298765432: [
16
+                "九千八百七十六万五千四百三十二亿九千八百七十六万五千四百三十二",
17
+                "玖仟捌佰柒拾陆万伍仟肆佰叁拾贰亿玖仟捌佰柒拾陆万伍仟肆佰叁拾贰",
18
+                "玖仟捌佰柒拾陆万伍仟肆佰叁拾贰亿玖仟捌佰柒拾陆万伍仟肆佰叁拾贰元整",
19
+                "九八七六五四三二九八七六五四三二"
20
+            ],
21
+            10000000000000: ["十万亿", "壹拾万亿", "壹拾万亿元整", "一零零零零零零零零零零零零零"],
22
+            -0: ["零", "零", "零元整", "零"],
23
+            -1: ["负一", "负壹", "负壹元整", "负一"],
24
+            -11: ["负十一", "负壹拾壹", "负壹拾壹元整", "负一一"],
25
+            0.000500050005005: [
26
+                "零点零零零五零零零五零零零五零零五",
27
+                "零点零零零伍零零零伍零零零伍零零伍",
28
+                "零元整",
29
+                "零点零零零五零零零五零零零五零零五"
30
+            ],
31
+            0.00005: ["零点零零零零五", "零点零零零零伍", "零元整", "零点零零零零五"],
32
+            0.4321: ["零点四三二一", "零点肆叁贰壹", "肆角叁分", "零点四三二一"],
33
+            1000054.4321: [
34
+                "一百万零五十四点四三二一",
35
+                "壹佰万零伍拾肆点肆叁贰壹",
36
+                "壹佰万零伍拾肆元肆角叁分",
37
+                "一零零零零五四点四三二一"
38
+            ],
39
+            1.01: ["一点零一", "壹点零壹", "壹元零壹分", "一点零一"],
40
+            1.2: ["一点二", "壹点贰", "壹元贰角", "一点二"],
41
+            0.01: ["零点零一", "零点零壹", "壹分", "零点零一"],
42
+            -0.1: ["负零点一", "负零点壹", "负壹角", "负零点一"],
43
+            -0: ["零", "零", "零元整", "零"],
44
+            1.10: ["一点一", "壹点壹", "壹元壹角", "一点一"],
45
+            12.0: ["十二点零", "壹拾贰点零", "壹拾贰元整", "一二点零"],
46
+            2.0: ["二点零", "贰点零", "贰元整", "二点零"],
47
+            0.10: ["零点一", "零点壹", "壹角", "零点一"]
48
+        }
49
+
50
+        self.error_input_data = [
51
+            "123.1.1",
52
+            "0.1零"
53
+        ]
54
+
55
+        self.ac = An2Cn()
56
+
57
+    def test_an2cn(self) -> None:
58
+        for item in self.input_data.keys():
59
+            self.assertEqual(self.ac.an2cn(item), self.input_data[item][0])
60
+            self.assertEqual(self.ac.an2cn(item, "low"), self.input_data[item][0])
61
+            self.assertEqual(self.ac.an2cn(item, "up"), self.input_data[item][1])
62
+            self.assertEqual(self.ac.an2cn(item, "rmb"), self.input_data[item][2])
63
+            self.assertEqual(self.ac.an2cn(item, "direct"), self.input_data[item][3])
64
+
65
+        with self.assertRaises(ValueError):
66
+            for error_data in self.error_input_data:
67
+                self.ac.an2cn(error_data)
68
+
69
+
70
+if __name__ == '__main__':
71
+    unittest.main()

+ 294 - 0
utils/cn2an/cn2an.py

@@ -0,0 +1,294 @@
1
+import re
2
+from warnings import warn
3
+from typing import Union
4
+
5
+from proces import preprocess
6
+
7
+from .an2cn import An2Cn
8
+from .conf import NUMBER_CN2AN, UNIT_CN2AN, STRICT_CN_NUMBER, NORMAL_CN_NUMBER, NUMBER_LOW_AN2CN, UNIT_LOW_AN2CN
9
+
10
+
11
+class Cn2An(object):
12
+    def __init__(self) -> None:
13
+        self.all_num = "".join(list(NUMBER_CN2AN.keys()))
14
+        self.all_unit = "".join(list(UNIT_CN2AN.keys()))
15
+        self.strict_cn_number = STRICT_CN_NUMBER
16
+        self.normal_cn_number = NORMAL_CN_NUMBER
17
+        self.check_key_dict = {
18
+            "strict": "".join(self.strict_cn_number.values()) + "点负",
19
+            "normal": "".join(self.normal_cn_number.values()) + "点负",
20
+            "smart": "".join(self.normal_cn_number.values()) + "点负" + "01234567890.-"
21
+        }
22
+        self.pattern_dict = self.__get_pattern()
23
+        self.ac = An2Cn()
24
+        self.mode_list = ["strict", "normal", "smart"]
25
+        self.yjf_pattern = re.compile(fr"^.*?[元圆][{self.all_num}]角([{self.all_num}]分)?$")
26
+        self.pattern1 = re.compile(fr"^-?\d+(\.\d+)?[{self.all_unit}]?$")
27
+        self.ptn_all_num = re.compile(f"^[{self.all_num}]+$")
28
+        # "十?" is for special case "十一万三"
29
+        self.ptn_speaking_mode = re.compile(f"^([{self.all_num}]{{0,2}}[{self.all_unit}])+[{self.all_num}]$")
30
+
31
+    def cn2an(self, inputs: Union[str, int, float] = None, mode: str = "strict") -> Union[float, int]:
32
+        """中文数字转阿拉伯数字
33
+
34
+        :param inputs: 中文数字、阿拉伯数字、中文数字和阿拉伯数字
35
+        :param mode: strict 严格,normal 正常,smart 智能
36
+        :return: 阿拉伯数字
37
+        """
38
+        if inputs is not None or inputs == "":
39
+            if mode not in self.mode_list:
40
+                raise ValueError(f"mode 仅支持 {str(self.mode_list)} !")
41
+
42
+            # 将数字转化为字符串
43
+            if not isinstance(inputs, str):
44
+                inputs = str(inputs)
45
+
46
+            # 数据预处理:
47
+            # 1. 繁体转简体
48
+            # 2. 全角转半角
49
+            inputs = preprocess(inputs, pipelines=[
50
+                "traditional_to_simplified",
51
+                "full_angle_to_half_angle"
52
+            ])
53
+
54
+            # 特殊转化 廿
55
+            inputs = inputs.replace("廿", "二十")
56
+
57
+            # 检查输入数据是否有效
58
+            sign, integer_data, decimal_data, is_all_num = self.__check_input_data_is_valid(inputs, mode)
59
+
60
+            # smart 下的特殊情况
61
+            if sign == 0:
62
+                return integer_data
63
+            else:
64
+                if not is_all_num:
65
+                    if decimal_data is None:
66
+                        output = self.__integer_convert(integer_data)
67
+                    else:
68
+                        output = self.__integer_convert(integer_data) + self.__decimal_convert(decimal_data)
69
+                        # fix 1 + 0.57 = 1.5699999999999998
70
+                        output = round(output, len(decimal_data))
71
+                else:
72
+                    if decimal_data is None:
73
+                        output = self.__direct_convert(integer_data)
74
+                    else:
75
+                        output = self.__direct_convert(integer_data) + self.__decimal_convert(decimal_data)
76
+                        # fix 1 + 0.57 = 1.5699999999999998
77
+                        output = round(output, len(decimal_data))
78
+        else:
79
+            raise ValueError("输入数据为空!")
80
+
81
+        return sign * output
82
+
83
+    def __get_pattern(self) -> dict:
84
+        # 整数严格检查
85
+        _0 = "[零]"
86
+        _1_9 = "[一二三四五六七八九]"
87
+        _10_99 = f"{_1_9}?[十]{_1_9}?"
88
+        _1_99 = f"({_10_99}|{_1_9})"
89
+        _100_999 = f"({_1_9}[百]([零]{_1_9})?|{_1_9}[百]{_10_99})"
90
+        _1_999 = f"({_100_999}|{_1_99})"
91
+        _1000_9999 = f"({_1_9}[千]([零]{_1_99})?|{_1_9}[千]{_100_999})"
92
+        _1_9999 = f"({_1000_9999}|{_1_999})"
93
+        _10000_99999999 = f"({_1_9999}[万]([零]{_1_999})?|{_1_9999}[万]{_1000_9999})"
94
+        _1_99999999 = f"({_10000_99999999}|{_1_9999})"
95
+        _100000000_9999999999999999 = f"({_1_99999999}[亿]([零]{_1_99999999})?|{_1_99999999}[亿]{_10000_99999999})"
96
+        _1_9999999999999999 = f"({_100000000_9999999999999999}|{_1_99999999})"
97
+        str_int_pattern = f"^({_0}|{_1_9999999999999999})$"
98
+        nor_int_pattern = f"^({_0}|{_1_9999999999999999})$"
99
+
100
+        str_dec_pattern = "^[零一二三四五六七八九]{0,15}[一二三四五六七八九]$"
101
+        nor_dec_pattern = "^[零一二三四五六七八九]{0,16}$"
102
+
103
+        for str_num in self.strict_cn_number.keys():
104
+            str_int_pattern = str_int_pattern.replace(str_num, self.strict_cn_number[str_num])
105
+            str_dec_pattern = str_dec_pattern.replace(str_num, self.strict_cn_number[str_num])
106
+        for nor_num in self.normal_cn_number.keys():
107
+            nor_int_pattern = nor_int_pattern.replace(nor_num, self.normal_cn_number[nor_num])
108
+            nor_dec_pattern = nor_dec_pattern.replace(nor_num, self.normal_cn_number[nor_num])
109
+
110
+        pattern_dict = {
111
+            "strict": {
112
+                "int": re.compile(str_int_pattern),
113
+                "dec": re.compile(str_dec_pattern)
114
+            },
115
+            "normal": {
116
+                "int": re.compile(nor_int_pattern),
117
+                "dec": re.compile(nor_dec_pattern)
118
+            }
119
+        }
120
+        return pattern_dict
121
+
122
+    def __copy_num(self, num):
123
+        cn_num = ""
124
+        for n in num:
125
+            cn_num += NUMBER_LOW_AN2CN[int(n)]
126
+        return cn_num
127
+
128
+    def __check_input_data_is_valid(self, check_data: str, mode: str) -> (int, str, str, bool):
129
+        # 去除 元整、圆整、元正、圆正
130
+        stop_words = ["元整", "圆整", "元正", "圆正"]
131
+        for word in stop_words:
132
+            if check_data[-2:] == word:
133
+                check_data = check_data[:-2]
134
+
135
+        # 去除 元、圆
136
+        if mode != "strict":
137
+            normal_stop_words = ["圆", "元"]
138
+            for word in normal_stop_words:
139
+                if check_data[-1] == word:
140
+                    check_data = check_data[:-1]
141
+
142
+        # 处理元角分
143
+        result = self.yjf_pattern.search(check_data)
144
+        if result:
145
+            check_data = check_data.replace("元", "点").replace("角", "").replace("分", "")
146
+
147
+        # 处理特殊问法:一千零十一 一万零百一十一
148
+        if "零十" in check_data:
149
+            check_data = check_data.replace("零十", "零一十")
150
+        if "零百" in check_data:
151
+            check_data = check_data.replace("零百", "零一百")
152
+
153
+        for data in check_data:
154
+            if data not in self.check_key_dict[mode]:
155
+                raise ValueError(f"当前为{mode}模式,输入的数据不在转化范围内:{data}!")
156
+
157
+        # 确定正负号
158
+        if check_data[0] == "负":
159
+            check_data = check_data[1:]
160
+            sign = -1
161
+        else:
162
+            sign = 1
163
+
164
+        if "点" in check_data:
165
+            split_data = check_data.split("点")
166
+            if len(split_data) == 2:
167
+                integer_data, decimal_data = split_data
168
+                # 将 smart 模式中的阿拉伯数字转化成中文数字
169
+                if mode == "smart":
170
+                    integer_data = re.sub(r"\d+", lambda x: self.ac.an2cn(x.group()), integer_data)
171
+                    decimal_data = re.sub(r"\d+", lambda x: self.__copy_num(x.group()), decimal_data)
172
+                    mode = "normal"
173
+            else:
174
+                raise ValueError("数据中包含不止一个点!")
175
+        else:
176
+            integer_data = check_data
177
+            decimal_data = None
178
+            # 将 smart 模式中的阿拉伯数字转化成中文数字
179
+            if mode == "smart":
180
+                # 10.1万 10.1
181
+                result1 = self.pattern1.search(integer_data)
182
+                if result1:
183
+                    if result1.group() == integer_data:
184
+                        if integer_data[-1] in UNIT_CN2AN.keys():
185
+                            output = int(float(integer_data[:-1]) * UNIT_CN2AN[integer_data[-1]])
186
+                        else:
187
+                            output = float(integer_data)
188
+                        return 0, output, None, None
189
+
190
+                integer_data = re.sub(r"\d+", lambda x: self.ac.an2cn(x.group()), integer_data)
191
+                mode = "normal"
192
+
193
+        result_int = self.pattern_dict[mode]["int"].search(integer_data)
194
+        if result_int:
195
+            if result_int.group() == integer_data:
196
+                if decimal_data is not None:
197
+                    result_dec = self.pattern_dict[mode]["dec"].search(decimal_data)
198
+                    if result_dec:
199
+                        if result_dec.group() == decimal_data:
200
+                            return sign, integer_data, decimal_data, False
201
+                else:
202
+                    return sign, integer_data, decimal_data, False
203
+        else:
204
+            if mode == "strict":
205
+                raise ValueError(f"不符合格式的数据:{integer_data}")
206
+            elif mode == "normal":
207
+                # 纯数模式:一二三
208
+                result_all_num = self.ptn_all_num.search(integer_data)
209
+                if result_all_num:
210
+                    if result_all_num.group() == integer_data:
211
+                        if decimal_data is not None:
212
+                            result_dec = self.pattern_dict[mode]["dec"].search(decimal_data)
213
+                            if result_dec:
214
+                                if result_dec.group() == decimal_data:
215
+                                    return sign, integer_data, decimal_data, True
216
+                        else:
217
+                            return sign, integer_data, decimal_data, True
218
+
219
+                # 口语模式:一万二,两千三,三百四,十三万六,一百二十五万三
220
+                result_speaking_mode = self.ptn_speaking_mode.search(integer_data)
221
+                if len(integer_data) >= 3 and result_speaking_mode and result_speaking_mode.group() == integer_data:
222
+                    # len(integer_data)>=3: because the minimum length of integer_data that can be matched is 3
223
+                    # to find the last unit
224
+                    last_unit = result_speaking_mode.groups()[-1][-1]
225
+                    _unit = UNIT_LOW_AN2CN[UNIT_CN2AN[last_unit] // 10]
226
+                    integer_data = integer_data + _unit
227
+                    if decimal_data is not None:
228
+                        result_dec = self.pattern_dict[mode]["dec"].search(decimal_data)
229
+                        if result_dec:
230
+                            if result_dec.group() == decimal_data:
231
+                                return sign, integer_data, decimal_data, False
232
+                    else:
233
+                        return sign, integer_data, decimal_data, False
234
+
235
+        raise ValueError(f"不符合格式的数据:{check_data}")
236
+
237
+    def __integer_convert(self, integer_data: str) -> int:
238
+        # 核心
239
+        output_integer = 0
240
+        unit = 1
241
+        ten_thousand_unit = 1
242
+        for index, cn_num in enumerate(reversed(integer_data)):
243
+            # 数值
244
+            if cn_num in NUMBER_CN2AN:
245
+                num = NUMBER_CN2AN[cn_num]
246
+                output_integer += num * unit
247
+            # 单位
248
+            elif cn_num in UNIT_CN2AN:
249
+                unit = UNIT_CN2AN[cn_num]
250
+                # 判断出万、亿、万亿
251
+                if unit % 10000 == 0:
252
+                    # 万 亿
253
+                    if unit > ten_thousand_unit:
254
+                        ten_thousand_unit = unit
255
+                    # 万亿
256
+                    else:
257
+                        ten_thousand_unit = unit * ten_thousand_unit
258
+                        unit = ten_thousand_unit
259
+
260
+                if unit < ten_thousand_unit:
261
+                    unit = unit * ten_thousand_unit
262
+
263
+                if index == len(integer_data) - 1:
264
+                    output_integer += unit
265
+            else:
266
+                raise ValueError(f"{cn_num} 不在转化范围内")
267
+
268
+        return int(output_integer)
269
+
270
+    def __decimal_convert(self, decimal_data: str) -> float:
271
+        len_decimal_data = len(decimal_data)
272
+
273
+        if len_decimal_data > 16:
274
+            warn(f"注意:小数部分长度为 {len_decimal_data} ,将自动截取前 16 位有效精度!")
275
+            decimal_data = decimal_data[:16]
276
+            len_decimal_data = 16
277
+
278
+        output_decimal = 0
279
+        for index in range(len(decimal_data) - 1, -1, -1):
280
+            unit_key = NUMBER_CN2AN[decimal_data[index]]
281
+            output_decimal += unit_key * 10 ** -(index + 1)
282
+
283
+        # 处理精度溢出问题
284
+        output_decimal = round(output_decimal, len_decimal_data)
285
+
286
+        return output_decimal
287
+
288
+    def __direct_convert(self, data: str) -> int:
289
+        output_data = 0
290
+        for index in range(len(data) - 1, -1, -1):
291
+            unit_key = NUMBER_CN2AN[data[index]]
292
+            output_data += unit_key * 10 ** (len(data) - index - 1)
293
+
294
+        return output_data

+ 215 - 0
utils/cn2an/cn2an_test.py

@@ -0,0 +1,215 @@
1
+import unittest
2
+
3
+from .cn2an import Cn2An
4
+
5
+
6
+class Cn2anTest(unittest.TestCase):
7
+    def setUp(self) -> None:
8
+        self.strict_data_dict = {
9
+            "零": 0,
10
+            "一": 1,
11
+            "十": 10,
12
+            "十一": 11,
13
+            "一十一": 11,
14
+            "二十": 20,
15
+            "二十一": 21,
16
+            "一百": 100,
17
+            "一百零一": 101,
18
+            "一百一十": 110,
19
+            "一百一十一": 111,
20
+            "一千": 1000,
21
+            "一千一百": 1100,
22
+            "一千一百一十": 1110,
23
+            "一千一百一十一": 1111,
24
+            "一千零一十": 1010,
25
+            "一千零十": 1010,
26
+            "一千零十一": 1011,
27
+            "一千零一十一": 1011,
28
+            "一千零一": 1001,
29
+            "一千一百零一": 1101,
30
+            "一万一千一百一十一": 11111,
31
+            "一十一万一千一百一十一": 111111,
32
+            "一百一十一万一千一百一十一": 1111111,
33
+            "一千一百一十一万一千一百一十一": 11111111,
34
+            "一亿一千一百一十一万一千一百一十一": 111111111,
35
+            "一十一亿一千一百一十一万一千一百一十一": 1111111111,
36
+            "一百一十一亿一千一百一十一万一千一百一十一": 11111111111,
37
+            "一千一百一十一亿一千一百一十一万一千一百一十一": 111111111111,
38
+            "一千一百一十一万一千一百一十一亿一千一百一十一万一千一百一十一": 1111111111111111,
39
+            "壹": 1,
40
+            "拾": 10,
41
+            "拾壹": 11,
42
+            "壹拾壹": 11,
43
+            "壹佰壹拾壹": 111,
44
+            "壹仟壹佰壹拾壹": 1111,
45
+            "壹万壹仟壹佰壹拾壹": 11111,
46
+            "壹拾壹万壹仟壹佰壹拾壹": 111111,
47
+            "壹佰壹拾壹万壹仟壹佰壹拾壹": 1111111,
48
+            "壹仟壹佰壹拾壹万壹仟壹佰壹拾壹": 11111111,
49
+            "壹亿壹仟壹佰壹拾壹万壹仟壹佰壹拾壹": 111111111,
50
+            "壹拾壹亿壹仟壹佰壹拾壹万壹仟壹佰壹拾壹": 1111111111,
51
+            "壹佰壹拾壹亿壹仟壹佰壹拾壹万壹仟壹佰壹拾壹": 11111111111,
52
+            "壹仟壹佰壹拾壹亿壹仟壹佰壹拾壹万壹仟壹佰壹拾壹": 111111111111,
53
+            "壹拾壹元整": 11,
54
+            "壹佰壹拾壹圆整": 111,
55
+            "壹拾壹元正": 11,
56
+            "壹拾壹圆正": 11,
57
+            "壹拾壹元壹角": 11.1,
58
+            "壹拾壹元壹角壹分": 11.11,
59
+            "十万": 100000,
60
+            "十万零一": 100001,
61
+            "一万零一": 10001,
62
+            "一万零一十一": 10011,
63
+            "一万零一百一十一": 10111,
64
+            "一万零百一十一": 10111,
65
+            "一十万零一": 100001,
66
+            "一百万零一": 1000001,
67
+            "一千万零一": 10000001,
68
+            "一千零一万一千零一": 10011001,
69
+            "一千零一万零一": 10010001,
70
+            "一亿零一": 100000001,
71
+            "一十亿零一": 1000000001,
72
+            "一百亿零一": 10000000001,
73
+            "一千零一亿一千零一万一千零一": 100110011001,
74
+            "一千亿一千万一千零一": 100010001001,
75
+            "一千亿零一": 100000000001,
76
+            "零点零零零零零零零零零零零零零零一": 0.000000000000001,
77
+            "零点零零零零零零零零零零零零零一": 0.00000000000001,
78
+            "零点零零零零零零零零零零零零一": 0.0000000000001,
79
+            "零点零零零零零零零零零零零一": 0.000000000001,
80
+            "零点零零零零零零零零零零一": 0.00000000001,
81
+            "零点零零零零零零零零零一": 0.0000000001,
82
+            "零点零零零零零零零零一": 0.000000001,
83
+            "零点零零零零零零零一": 0.00000001,
84
+            "零点零零零零零零一": 0.0000001,
85
+            "零点零零零零零一": 0.000001,
86
+            "零点零零零零一": 0.00001,
87
+            "零点零零零一": 0.0001,
88
+            "零点零零一": 0.001,
89
+            "零点零一": 0.01,
90
+            "零点一": 0.1,
91
+            "负一": -1,
92
+            "负二": -2,
93
+            "负十": -10,
94
+            "负十一": -11,
95
+            "负一十一": -11,
96
+            # 古语
97
+            "廿二": 22,
98
+        }
99
+
100
+        self.normal_data_dict = {
101
+            "一一": 11,
102
+            "一一一": 111,
103
+            "壹壹": 11,
104
+            "壹壹壹": 111,
105
+            "零点零": 0,
106
+            "零点零零": 0,
107
+            "一七二零": 1720,
108
+            "一七二零点一": 1720.1,
109
+            "一七二零点一三四": 1720.134,
110
+            "一二三": 123,
111
+            "负零点一零": -0.1,
112
+            "负一七二零": -1720,
113
+            "负一七二零点一": -1720.1,
114
+            # 口语
115
+            "三万五": 35000,
116
+            "十三万五": 135000,
117
+            "两千六": 2600,
118
+            "一百二": 120,
119
+            "一百二十万三": 1203000,
120
+            # 繁体
121
+            "兩千六": 2600,
122
+            # 大写
123
+            "壹拾壹元": 11,
124
+            "壹佰壹拾壹圆": 111,
125
+            "壹拾壹圆": 11,
126
+            # 特殊
127
+            "〇": 0,
128
+        }
129
+
130
+        self.smart_data_dict = {
131
+            "100万": 1000000,
132
+            "100万三千": 1003000,
133
+            "200亿零四千230": 20000004230,
134
+            "一百点123": 100.123,
135
+            "10.1万": 101000,
136
+            "-10.1万": -101000,
137
+            "35.1亿": 3510000000,
138
+            "10.1": 10.1,
139
+            "-10.1": -10.1,
140
+        }
141
+
142
+        self.error_smart_datas = [
143
+            "10.1万零100",
144
+            "10..1万",
145
+        ]
146
+
147
+        self.error_normal_datas = [
148
+            "零点",
149
+            "点零",
150
+            "零点点",
151
+            "零点零大",
152
+        ]
153
+        self.error_normal_datas.extend(self.error_smart_datas)
154
+        self.error_normal_datas.extend(list(self.smart_data_dict.keys()))
155
+
156
+        self.error_strict_datas = [
157
+            "一一",
158
+            "壹壹",
159
+            "零点",
160
+            "点零",
161
+            "点一",
162
+            "百十一",
163
+            "十一十二",
164
+            "负十一十二",
165
+            "十七十八",
166
+        ]
167
+        self.error_strict_datas.extend(self.error_normal_datas)
168
+        self.error_strict_datas.extend(list(self.normal_data_dict.keys()))
169
+
170
+        # 不可修改位置
171
+        self.normal_data_dict.update(self.strict_data_dict)
172
+        self.smart_data_dict.update(self.normal_data_dict)
173
+
174
+        self.ca = Cn2An()
175
+
176
+    def test_cn2an(self) -> None:
177
+        for strict_item in self.strict_data_dict.keys():
178
+            self.assertEqual(self.ca.cn2an(strict_item, "strict"),
179
+                             self.strict_data_dict[strict_item])
180
+
181
+        for normal_item in self.normal_data_dict.keys():
182
+            self.assertEqual(self.ca.cn2an(normal_item, "normal"),
183
+                             self.normal_data_dict[normal_item])
184
+
185
+        for smart_item in self.smart_data_dict.keys():
186
+            self.assertEqual(self.ca.cn2an(smart_item, "smart"),
187
+                             self.smart_data_dict[smart_item])
188
+
189
+        for error_strict_item in self.error_strict_datas:
190
+            try:
191
+                self.ca.cn2an(error_strict_item)
192
+            except ValueError as e:
193
+                self.assertEqual(type(e), ValueError)
194
+            else:
195
+                raise Exception(f'ValueError not raised: {error_strict_item}')
196
+
197
+        for error_normal_item in self.error_normal_datas:
198
+            try:
199
+                self.ca.cn2an(error_normal_item)
200
+            except ValueError as e:
201
+                self.assertEqual(type(e), ValueError)
202
+            else:
203
+                raise Exception(f'ValueError not raised: {error_normal_item}')
204
+
205
+        for error_smart_item in self.error_smart_datas:
206
+            try:
207
+                self.ca.cn2an(error_smart_item)
208
+            except ValueError as e:
209
+                self.assertEqual(type(e), ValueError)
210
+            else:
211
+                raise Exception(f'ValueError not raised: {error_smart_item}')
212
+
213
+
214
+if __name__ == '__main__':
215
+    unittest.main()

+ 135 - 0
utils/cn2an/conf.py

@@ -0,0 +1,135 @@
1
+NUMBER_CN2AN = {
2
+    "零": 0,
3
+    "〇": 0,
4
+    "一": 1,
5
+    "壹": 1,
6
+    "幺": 1,
7
+    "二": 2,
8
+    "贰": 2,
9
+    "两": 2,
10
+    "三": 3,
11
+    "叁": 3,
12
+    "四": 4,
13
+    "肆": 4,
14
+    "五": 5,
15
+    "伍": 5,
16
+    "六": 6,
17
+    "陆": 6,
18
+    "七": 7,
19
+    "柒": 7,
20
+    "八": 8,
21
+    "捌": 8,
22
+    "九": 9,
23
+    "玖": 9,
24
+}
25
+UNIT_CN2AN = {
26
+    "十": 10,
27
+    "拾": 10,
28
+    "百": 100,
29
+    "佰": 100,
30
+    "千": 1000,
31
+    "仟": 1000,
32
+    "万": 10000,
33
+    "亿": 100000000,
34
+}
35
+UNIT_LOW_AN2CN = {
36
+    10: "十",
37
+    100: "百",
38
+    1000: "千",
39
+    10000: "万",
40
+    100000000: "亿",
41
+}
42
+NUMBER_LOW_AN2CN = {
43
+    0: "零",
44
+    1: "一",
45
+    2: "二",
46
+    3: "三",
47
+    4: "四",
48
+    5: "五",
49
+    6: "六",
50
+    7: "七",
51
+    8: "八",
52
+    9: "九",
53
+}
54
+NUMBER_UP_AN2CN = {
55
+    0: "零",
56
+    1: "壹",
57
+    2: "贰",
58
+    3: "叁",
59
+    4: "肆",
60
+    5: "伍",
61
+    6: "陆",
62
+    7: "柒",
63
+    8: "捌",
64
+    9: "玖",
65
+}
66
+UNIT_LOW_ORDER_AN2CN = [
67
+    "",
68
+    "十",
69
+    "百",
70
+    "千",
71
+    "万",
72
+    "十",
73
+    "百",
74
+    "千",
75
+    "亿",
76
+    "十",
77
+    "百",
78
+    "千",
79
+    "万",
80
+    "十",
81
+    "百",
82
+    "千",
83
+]
84
+UNIT_UP_ORDER_AN2CN = [
85
+    "",
86
+    "拾",
87
+    "佰",
88
+    "仟",
89
+    "万",
90
+    "拾",
91
+    "佰",
92
+    "仟",
93
+    "亿",
94
+    "拾",
95
+    "佰",
96
+    "仟",
97
+    "万",
98
+    "拾",
99
+    "佰",
100
+    "仟",
101
+]
102
+STRICT_CN_NUMBER = {
103
+    "零": "零",
104
+    "一": "一壹",
105
+    "二": "二贰",
106
+    "三": "三叁",
107
+    "四": "四肆",
108
+    "五": "五伍",
109
+    "六": "六陆",
110
+    "七": "七柒",
111
+    "八": "八捌",
112
+    "九": "九玖",
113
+    "十": "十拾",
114
+    "百": "百佰",
115
+    "千": "千仟",
116
+    "万": "万",
117
+    "亿": "亿",
118
+}
119
+NORMAL_CN_NUMBER = {
120
+    "零": "零〇",
121
+    "一": "一壹幺",
122
+    "二": "二贰两",
123
+    "三": "三叁仨",
124
+    "四": "四肆",
125
+    "五": "五伍",
126
+    "六": "六陆",
127
+    "七": "七柒",
128
+    "八": "八捌",
129
+    "九": "九玖",
130
+    "十": "十拾",
131
+    "百": "百佰",
132
+    "千": "千仟",
133
+    "万": "万",
134
+    "亿": "亿",
135
+}

+ 29 - 0
utils/cn2an/performance.py

@@ -0,0 +1,29 @@
1
+import torbjorn as tbn
2
+
3
+from .an2cn import An2Cn
4
+from .cn2an import Cn2An
5
+
6
+ac = An2Cn()
7
+ca = Cn2An()
8
+
9
+an = 9876543298765432
10
+cn = "九千八百七十六万五千四百三十二亿九千八百七十六万五千四百三十二"
11
+
12
+
13
+@tbn.run_time
14
+def run_cn2an_ten_thousand_times() -> None:
15
+    for _ in range(10000):
16
+        result = ca.cn2an(cn)
17
+        assert result == an
18
+
19
+
20
+@tbn.run_time
21
+def run_an2cn_ten_thousand_times() -> None:
22
+    for _ in range(10000):
23
+        result = ac.an2cn(an)
24
+        assert result == cn
25
+
26
+
27
+if __name__ == '__main__':
28
+    run_cn2an_ten_thousand_times()
29
+    run_an2cn_ten_thousand_times()

+ 104 - 0
utils/cn2an/transform.py

@@ -0,0 +1,104 @@
1
+import re
2
+from warnings import warn
3
+
4
+from .cn2an import Cn2An
5
+from .an2cn import An2Cn
6
+from .conf import UNIT_CN2AN
7
+
8
+
9
+class Transform(object):
10
+    def __init__(self) -> None:
11
+        self.all_num = "零一二三四五六七八九"
12
+        self.all_unit = "".join(list(UNIT_CN2AN.keys()))
13
+        self.cn2an = Cn2An().cn2an
14
+        self.an2cn = An2Cn().an2cn
15
+        self.cn_pattern = f"负?([{self.all_num}{self.all_unit}]+点)?[{self.all_num}{self.all_unit}]+"
16
+        self.smart_cn_pattern = f"-?([0-9]+.)?[0-9]+[{self.all_unit}]+"
17
+
18
+    def transform(self, inputs: str, method: str = "cn2an") -> str:
19
+        if method == "cn2an":
20
+            inputs = inputs.replace("廿", "二十").replace("半", "0.5").replace("两", "2")
21
+            # date
22
+            inputs = re.sub(
23
+                fr"((({self.smart_cn_pattern})|({self.cn_pattern}))年)?([{self.all_num}十]+月)?([{self.all_num}十]+日)?",
24
+                lambda x: self.__sub_util(x.group(), "cn2an", "date"), inputs)
25
+            # fraction
26
+            inputs = re.sub(fr"{self.cn_pattern}分之{self.cn_pattern}",
27
+                            lambda x: self.__sub_util(x.group(), "cn2an", "fraction"), inputs)
28
+            # percent
29
+            inputs = re.sub(fr"百分之{self.cn_pattern}",
30
+                            lambda x: self.__sub_util(x.group(), "cn2an", "percent"), inputs)
31
+            # celsius
32
+            inputs = re.sub(fr"{self.cn_pattern}摄氏度",
33
+                            lambda x: self.__sub_util(x.group(), "cn2an", "celsius"), inputs)
34
+            # number
35
+            output = re.sub(self.cn_pattern,
36
+                            lambda x: self.__sub_util(x.group(), "cn2an", "number"), inputs)
37
+
38
+        elif method == "an2cn":
39
+            # date
40
+            inputs = re.sub(r"(\d{2,4}年)?(\d{1,2}月)?(\d{1,2}日)?",
41
+                            lambda x: self.__sub_util(x.group(), "an2cn", "date"), inputs)
42
+            # fraction
43
+            inputs = re.sub(r"\d+/\d+",
44
+                            lambda x: self.__sub_util(x.group(), "an2cn", "fraction"), inputs)
45
+            # percent
46
+            inputs = re.sub(r"-?(\d+\.)?\d+%",
47
+                            lambda x: self.__sub_util(x.group(), "an2cn", "percent"), inputs)
48
+            # celsius
49
+            inputs = re.sub(r"\d+℃",
50
+                            lambda x: self.__sub_util(x.group(), "an2cn", "celsius"), inputs)
51
+            # number
52
+            output = re.sub(r"-?(\d+\.)?\d+",
53
+                            lambda x: self.__sub_util(x.group(), "an2cn", "number"), inputs)
54
+        else:
55
+            raise ValueError(f"error method: {method}, only support 'cn2an' and 'an2cn'!")
56
+
57
+        return output
58
+
59
+    def __sub_util(self, inputs, method: str = "cn2an", sub_mode: str = "number") -> str:
60
+        try:
61
+            if inputs:
62
+                if method == "cn2an":
63
+                    if sub_mode == "date":
64
+                        return re.sub(fr"(({self.smart_cn_pattern})|({self.cn_pattern}))",
65
+                                      lambda x: str(self.cn2an(x.group(), "smart")), inputs)
66
+                    elif sub_mode == "fraction":
67
+                        if inputs[0] != "百":
68
+                            frac_result = re.sub(self.cn_pattern,
69
+                                                 lambda x: str(self.cn2an(x.group(), "smart")), inputs)
70
+                            numerator, denominator = frac_result.split("分之")
71
+                            return f"{denominator}/{numerator}"
72
+                        else:
73
+                            return inputs
74
+                    elif sub_mode == "percent":
75
+                        return re.sub(f"(?<=百分之){self.cn_pattern}",
76
+                                      lambda x: str(self.cn2an(x.group(), "smart")), inputs).replace("百分之", "") + "%"
77
+                    elif sub_mode == "celsius":
78
+                        return re.sub(f"{self.cn_pattern}(?=摄氏度)",
79
+                                      lambda x: str(self.cn2an(x.group(), "smart")), inputs).replace("摄氏度", "℃")
80
+                    elif sub_mode == "number":
81
+                        return str(self.cn2an(inputs, "smart"))
82
+                    else:
83
+                        raise Exception(f"error sub_mode: {sub_mode} !")
84
+                else:
85
+                    if sub_mode == "date":
86
+                        inputs = re.sub(r"\d+(?=年)",
87
+                                        lambda x: self.an2cn(x.group(), "direct"), inputs)
88
+                        return re.sub(r"\d+",
89
+                                      lambda x: self.an2cn(x.group(), "low"), inputs)
90
+                    elif sub_mode == "fraction":
91
+                        frac_result = re.sub(r"\d+", lambda x: self.an2cn(x.group(), "low"), inputs)
92
+                        numerator, denominator = frac_result.split("/")
93
+                        return f"{denominator}分之{numerator}"
94
+                    elif sub_mode == "celsius":
95
+                        return self.an2cn(inputs[:-1], "low") + "摄氏度"
96
+                    elif sub_mode == "percent":
97
+                        return "百分之" + self.an2cn(inputs[:-1], "low")
98
+                    elif sub_mode == "number":
99
+                        return self.an2cn(inputs, "low")
100
+                    else:
101
+                        raise Exception(f"error sub_mode: {sub_mode} !")
102
+        except Exception as e:
103
+            warn(str(e))
104
+            return inputs

+ 40 - 0
utils/cn2an/transform_test.py

@@ -0,0 +1,40 @@
1
+import unittest
2
+
3
+from .transform import Transform
4
+
5
+
6
+class TransformTest(unittest.TestCase):
7
+    def setUp(self) -> None:
8
+        self.strict_data_dict = {
9
+            "小王捡了100块钱": "小王捡了一百块钱",
10
+            "用户增长最快的3个城市": "用户增长最快的三个城市",
11
+            "小王的生日是2001年3月4日": "小王的生日是二零零一年三月四日",
12
+            "小王的生日是2012年12月12日": "小王的生日是二零一二年十二月十二日",
13
+            "今天股价上涨了8%": "今天股价上涨了百分之八",
14
+            "第2天股价下降了-3.8%": "第二天股价下降了百分之负三点八",
15
+            "抛出去的硬币为正面的概率是1/2": "抛出去的硬币为正面的概率是二分之一",
16
+            "现在室内温度为39℃,很热啊!": "现在室内温度为三十九摄氏度,很热啊!",
17
+            "创业板指9月9日早盘低开1.57%": "创业板指九月九日早盘低开百分之一点五七"
18
+        }
19
+
20
+        self.smart_data_dict = {
21
+            "约2.5亿年~6500万年": "约250000000年~65000000年",
22
+            "廿二日,日出东方": "22日,日出东方",
23
+            "大陆": "大陆",
24
+            "半斤": "0.5斤",
25
+            "两个": "2个",
26
+        }
27
+
28
+        self.t = Transform()
29
+
30
+    def test_transform(self) -> None:
31
+        for strict_item in self.strict_data_dict.keys():
32
+            self.assertEqual(self.t.transform(strict_item, "an2cn"), self.strict_data_dict[strict_item])
33
+            self.assertEqual(self.t.transform(self.strict_data_dict[strict_item], "cn2an"), strict_item)
34
+
35
+        for smart_item in self.smart_data_dict.keys():
36
+            self.assertEqual(self.t.transform(smart_item, "cn2an"), self.smart_data_dict[smart_item])
37
+
38
+
39
+if __name__ == '__main__':
40
+    unittest.main()