@@ -0,0 +1,16 @@ |
||
1 |
+__version__ = "0.5.22" |
|
2 |
+ |
|
3 |
+from .cn2an import Cn2An |
|
4 |
+from .an2cn import An2Cn |
|
5 |
+from .transform import Transform |
|
6 |
+ |
|
7 |
+cn2an = Cn2An().cn2an |
|
8 |
+an2cn = An2Cn().an2cn |
|
9 |
+transform = Transform().transform |
|
10 |
+ |
|
11 |
+__all__ = [ |
|
12 |
+ "__version__", |
|
13 |
+ "cn2an", |
|
14 |
+ "an2cn", |
|
15 |
+ "transform" |
|
16 |
+] |
@@ -0,0 +1,204 @@ |
||
1 |
+from typing import Union |
|
2 |
+from warnings import warn |
|
3 |
+ |
|
4 |
+from proces import preprocess |
|
5 |
+ |
|
6 |
+from .conf import NUMBER_LOW_AN2CN, NUMBER_UP_AN2CN, UNIT_LOW_ORDER_AN2CN, UNIT_UP_ORDER_AN2CN |
|
7 |
+ |
|
8 |
+ |
|
9 |
+class An2Cn(object): |
|
10 |
+ def __init__(self) -> None: |
|
11 |
+ self.all_num = "0123456789" |
|
12 |
+ self.number_low = NUMBER_LOW_AN2CN |
|
13 |
+ self.number_up = NUMBER_UP_AN2CN |
|
14 |
+ self.mode_list = ["low", "up", "rmb", "direct"] |
|
15 |
+ |
|
16 |
+ def an2cn(self, inputs: Union[str, int, float] = None, mode: str = "low") -> str: |
|
17 |
+ """阿拉伯数字转中文数字 |
|
18 |
+ |
|
19 |
+ :param inputs: 阿拉伯数字 |
|
20 |
+ :param mode: low 小写数字,up 大写数字,rmb 人民币大写,direct 直接转化 |
|
21 |
+ :return: 中文数字 |
|
22 |
+ """ |
|
23 |
+ if inputs is not None and inputs != "": |
|
24 |
+ if mode not in self.mode_list: |
|
25 |
+ raise ValueError(f"mode 仅支持 {str(self.mode_list)} !") |
|
26 |
+ |
|
27 |
+ # 将数字转化为字符串,这里会有Python会自动做转化 |
|
28 |
+ # 1. -> 1.0 1.00 -> 1.0 -0 -> 0 |
|
29 |
+ if not isinstance(inputs, str): |
|
30 |
+ inputs = self.__number_to_string(inputs) |
|
31 |
+ |
|
32 |
+ # 数据预处理: |
|
33 |
+ # 1. 繁体转简体 |
|
34 |
+ # 2. 全角转半角 |
|
35 |
+ inputs = preprocess(inputs, pipelines=[ |
|
36 |
+ "traditional_to_simplified", |
|
37 |
+ "full_angle_to_half_angle" |
|
38 |
+ ]) |
|
39 |
+ |
|
40 |
+ # 检查数据是否有效 |
|
41 |
+ self.__check_inputs_is_valid(inputs) |
|
42 |
+ |
|
43 |
+ # 判断正负 |
|
44 |
+ if inputs[0] == "-": |
|
45 |
+ sign = "负" |
|
46 |
+ inputs = inputs[1:] |
|
47 |
+ else: |
|
48 |
+ sign = "" |
|
49 |
+ |
|
50 |
+ if mode == "direct": |
|
51 |
+ output = self.__direct_convert(inputs) |
|
52 |
+ else: |
|
53 |
+ # 切割整数部分和小数部分 |
|
54 |
+ split_result = inputs.split(".") |
|
55 |
+ len_split_result = len(split_result) |
|
56 |
+ if len_split_result == 1: |
|
57 |
+ # 不包含小数的输入 |
|
58 |
+ integer_data = split_result[0] |
|
59 |
+ if mode == "rmb": |
|
60 |
+ output = self.__integer_convert(integer_data, "up") + "元整" |
|
61 |
+ else: |
|
62 |
+ output = self.__integer_convert(integer_data, mode) |
|
63 |
+ elif len_split_result == 2: |
|
64 |
+ # 包含小数的输入 |
|
65 |
+ integer_data, decimal_data = split_result |
|
66 |
+ if mode == "rmb": |
|
67 |
+ int_data = self.__integer_convert(integer_data, "up") |
|
68 |
+ dec_data = self.__decimal_convert(decimal_data, "up") |
|
69 |
+ len_dec_data = len(dec_data) |
|
70 |
+ |
|
71 |
+ if len_dec_data == 0: |
|
72 |
+ output = int_data + "元整" |
|
73 |
+ elif len_dec_data == 1: |
|
74 |
+ raise ValueError(f"异常输出:{dec_data}") |
|
75 |
+ elif len_dec_data == 2: |
|
76 |
+ if dec_data[1] != "零": |
|
77 |
+ if int_data == "零": |
|
78 |
+ output = dec_data[1] + "角" |
|
79 |
+ else: |
|
80 |
+ output = int_data + "元" + dec_data[1] + "角" |
|
81 |
+ else: |
|
82 |
+ output = int_data + "元整" |
|
83 |
+ else: |
|
84 |
+ if dec_data[1] != "零": |
|
85 |
+ if dec_data[2] != "零": |
|
86 |
+ if int_data == "零": |
|
87 |
+ output = dec_data[1] + "角" + dec_data[2] + "分" |
|
88 |
+ else: |
|
89 |
+ output = int_data + "元" + dec_data[1] + "角" + dec_data[2] + "分" |
|
90 |
+ else: |
|
91 |
+ if int_data == "零": |
|
92 |
+ output = dec_data[1] + "角" |
|
93 |
+ else: |
|
94 |
+ output = int_data + "元" + dec_data[1] + "角" |
|
95 |
+ else: |
|
96 |
+ if dec_data[2] != "零": |
|
97 |
+ if int_data == "零": |
|
98 |
+ output = dec_data[2] + "分" |
|
99 |
+ else: |
|
100 |
+ output = int_data + "元" + "零" + dec_data[2] + "分" |
|
101 |
+ else: |
|
102 |
+ output = int_data + "元整" |
|
103 |
+ else: |
|
104 |
+ output = self.__integer_convert(integer_data, mode) + self.__decimal_convert(decimal_data, mode) |
|
105 |
+ else: |
|
106 |
+ raise ValueError(f"输入格式错误:{inputs}!") |
|
107 |
+ else: |
|
108 |
+ raise ValueError("输入数据为空!") |
|
109 |
+ |
|
110 |
+ return sign + output |
|
111 |
+ |
|
112 |
+ def __direct_convert(self, inputs: str) -> str: |
|
113 |
+ _output = "" |
|
114 |
+ for d in inputs: |
|
115 |
+ if d == ".": |
|
116 |
+ _output += "点" |
|
117 |
+ else: |
|
118 |
+ _output += self.number_low[int(d)] |
|
119 |
+ return _output |
|
120 |
+ |
|
121 |
+ @staticmethod |
|
122 |
+ def __number_to_string(number_data: Union[int, float]) -> str: |
|
123 |
+ # 小数处理:python 会自动把 0.00005 转化成 5e-05,因此 str(0.00005) != "0.00005" |
|
124 |
+ string_data = str(number_data) |
|
125 |
+ if "e" in string_data: |
|
126 |
+ string_data_list = string_data.split("e") |
|
127 |
+ string_key = string_data_list[0] |
|
128 |
+ string_value = string_data_list[1] |
|
129 |
+ if string_value[0] == "-": |
|
130 |
+ string_data = "0." + "0" * (int(string_value[1:]) - 1) + string_key |
|
131 |
+ else: |
|
132 |
+ string_data = string_key + "0" * int(string_value) |
|
133 |
+ return string_data |
|
134 |
+ |
|
135 |
+ def __check_inputs_is_valid(self, check_data: str) -> None: |
|
136 |
+ # 检查输入数据是否在规定的字典中 |
|
137 |
+ all_check_keys = self.all_num + ".-" |
|
138 |
+ for data in check_data: |
|
139 |
+ if data not in all_check_keys: |
|
140 |
+ raise ValueError(f"输入的数据不在转化范围内:{data}!") |
|
141 |
+ |
|
142 |
+ def __integer_convert(self, integer_data: str, mode: str) -> str: |
|
143 |
+ if mode == "low": |
|
144 |
+ numeral_list = NUMBER_LOW_AN2CN |
|
145 |
+ unit_list = UNIT_LOW_ORDER_AN2CN |
|
146 |
+ elif mode == "up": |
|
147 |
+ numeral_list = NUMBER_UP_AN2CN |
|
148 |
+ unit_list = UNIT_UP_ORDER_AN2CN |
|
149 |
+ else: |
|
150 |
+ raise ValueError(f"error mode: {mode}") |
|
151 |
+ |
|
152 |
+ # 去除前面的 0,比如 007 => 7 |
|
153 |
+ integer_data = str(int(integer_data)) |
|
154 |
+ |
|
155 |
+ len_integer_data = len(integer_data) |
|
156 |
+ if len_integer_data > len(unit_list): |
|
157 |
+ raise ValueError(f"超出数据范围,最长支持 {len(unit_list)} 位") |
|
158 |
+ |
|
159 |
+ output_an = "" |
|
160 |
+ for i, d in enumerate(integer_data): |
|
161 |
+ if int(d): |
|
162 |
+ output_an += numeral_list[int(d)] + unit_list[len_integer_data - i - 1] |
|
163 |
+ else: |
|
164 |
+ if not (len_integer_data - i - 1) % 4: |
|
165 |
+ output_an += numeral_list[int(d)] + unit_list[len_integer_data - i - 1] |
|
166 |
+ |
|
167 |
+ if i > 0 and not output_an[-1] == "零": |
|
168 |
+ output_an += numeral_list[int(d)] |
|
169 |
+ |
|
170 |
+ output_an = output_an.replace("零零", "零").replace("零万", "万").replace("零亿", "亿").replace("亿万", "亿") \ |
|
171 |
+ .strip("零") |
|
172 |
+ |
|
173 |
+ # 解决「一十几」问题 |
|
174 |
+ if output_an[:2] in ["一十"]: |
|
175 |
+ output_an = output_an[1:] |
|
176 |
+ |
|
177 |
+ # 0 - 1 之间的小数 |
|
178 |
+ if not output_an: |
|
179 |
+ output_an = "零" |
|
180 |
+ |
|
181 |
+ return output_an |
|
182 |
+ |
|
183 |
+ def __decimal_convert(self, decimal_data: str, o_mode: str) -> str: |
|
184 |
+ len_decimal_data = len(decimal_data) |
|
185 |
+ |
|
186 |
+ if len_decimal_data > 16: |
|
187 |
+ warn(f"注意:小数部分长度为 {len_decimal_data} ,将自动截取前 16 位有效精度!") |
|
188 |
+ decimal_data = decimal_data[:16] |
|
189 |
+ |
|
190 |
+ if len_decimal_data: |
|
191 |
+ output_an = "点" |
|
192 |
+ else: |
|
193 |
+ output_an = "" |
|
194 |
+ |
|
195 |
+ if o_mode == "low": |
|
196 |
+ numeral_list = NUMBER_LOW_AN2CN |
|
197 |
+ elif o_mode == "up": |
|
198 |
+ numeral_list = NUMBER_UP_AN2CN |
|
199 |
+ else: |
|
200 |
+ raise ValueError(f"error mode: {o_mode}") |
|
201 |
+ |
|
202 |
+ for data in decimal_data: |
|
203 |
+ output_an += numeral_list[int(data)] |
|
204 |
+ return output_an |
@@ -0,0 +1,71 @@ |
||
1 |
+import unittest |
|
2 |
+ |
|
3 |
+from .an2cn import An2Cn |
|
4 |
+ |
|
5 |
+ |
|
6 |
+class An2CnTest(unittest.TestCase): |
|
7 |
+ def setUp(self) -> None: |
|
8 |
+ self.input_data = { |
|
9 |
+ 0: ["零", "零", "零元整", "零"], |
|
10 |
+ 1: ["一", "壹", "壹元整", "一"], |
|
11 |
+ 11: ["十一", "壹拾壹", "壹拾壹元整", "一一"], |
|
12 |
+ 1000000: ["一百万", "壹佰万", "壹佰万元整", "一零零零零零零"], |
|
13 |
+ 1000054: ["一百万零五十四", "壹佰万零伍拾肆", "壹佰万零伍拾肆元整", "一零零零零五四"], |
|
14 |
+ 31000054: ["三千一百万零五十四", "叁仟壹佰万零伍拾肆", "叁仟壹佰万零伍拾肆元整", "三一零零零零五四"], |
|
15 |
+ 9876543298765432: [ |
|
16 |
+ "九千八百七十六万五千四百三十二亿九千八百七十六万五千四百三十二", |
|
17 |
+ "玖仟捌佰柒拾陆万伍仟肆佰叁拾贰亿玖仟捌佰柒拾陆万伍仟肆佰叁拾贰", |
|
18 |
+ "玖仟捌佰柒拾陆万伍仟肆佰叁拾贰亿玖仟捌佰柒拾陆万伍仟肆佰叁拾贰元整", |
|
19 |
+ "九八七六五四三二九八七六五四三二" |
|
20 |
+ ], |
|
21 |
+ 10000000000000: ["十万亿", "壹拾万亿", "壹拾万亿元整", "一零零零零零零零零零零零零零"], |
|
22 |
+ -0: ["零", "零", "零元整", "零"], |
|
23 |
+ -1: ["负一", "负壹", "负壹元整", "负一"], |
|
24 |
+ -11: ["负十一", "负壹拾壹", "负壹拾壹元整", "负一一"], |
|
25 |
+ 0.000500050005005: [ |
|
26 |
+ "零点零零零五零零零五零零零五零零五", |
|
27 |
+ "零点零零零伍零零零伍零零零伍零零伍", |
|
28 |
+ "零元整", |
|
29 |
+ "零点零零零五零零零五零零零五零零五" |
|
30 |
+ ], |
|
31 |
+ 0.00005: ["零点零零零零五", "零点零零零零伍", "零元整", "零点零零零零五"], |
|
32 |
+ 0.4321: ["零点四三二一", "零点肆叁贰壹", "肆角叁分", "零点四三二一"], |
|
33 |
+ 1000054.4321: [ |
|
34 |
+ "一百万零五十四点四三二一", |
|
35 |
+ "壹佰万零伍拾肆点肆叁贰壹", |
|
36 |
+ "壹佰万零伍拾肆元肆角叁分", |
|
37 |
+ "一零零零零五四点四三二一" |
|
38 |
+ ], |
|
39 |
+ 1.01: ["一点零一", "壹点零壹", "壹元零壹分", "一点零一"], |
|
40 |
+ 1.2: ["一点二", "壹点贰", "壹元贰角", "一点二"], |
|
41 |
+ 0.01: ["零点零一", "零点零壹", "壹分", "零点零一"], |
|
42 |
+ -0.1: ["负零点一", "负零点壹", "负壹角", "负零点一"], |
|
43 |
+ -0: ["零", "零", "零元整", "零"], |
|
44 |
+ 1.10: ["一点一", "壹点壹", "壹元壹角", "一点一"], |
|
45 |
+ 12.0: ["十二点零", "壹拾贰点零", "壹拾贰元整", "一二点零"], |
|
46 |
+ 2.0: ["二点零", "贰点零", "贰元整", "二点零"], |
|
47 |
+ 0.10: ["零点一", "零点壹", "壹角", "零点一"] |
|
48 |
+ } |
|
49 |
+ |
|
50 |
+ self.error_input_data = [ |
|
51 |
+ "123.1.1", |
|
52 |
+ "0.1零" |
|
53 |
+ ] |
|
54 |
+ |
|
55 |
+ self.ac = An2Cn() |
|
56 |
+ |
|
57 |
+ def test_an2cn(self) -> None: |
|
58 |
+ for item in self.input_data.keys(): |
|
59 |
+ self.assertEqual(self.ac.an2cn(item), self.input_data[item][0]) |
|
60 |
+ self.assertEqual(self.ac.an2cn(item, "low"), self.input_data[item][0]) |
|
61 |
+ self.assertEqual(self.ac.an2cn(item, "up"), self.input_data[item][1]) |
|
62 |
+ self.assertEqual(self.ac.an2cn(item, "rmb"), self.input_data[item][2]) |
|
63 |
+ self.assertEqual(self.ac.an2cn(item, "direct"), self.input_data[item][3]) |
|
64 |
+ |
|
65 |
+ with self.assertRaises(ValueError): |
|
66 |
+ for error_data in self.error_input_data: |
|
67 |
+ self.ac.an2cn(error_data) |
|
68 |
+ |
|
69 |
+ |
|
70 |
+if __name__ == '__main__': |
|
71 |
+ unittest.main() |
@@ -0,0 +1,294 @@ |
||
1 |
+import re |
|
2 |
+from warnings import warn |
|
3 |
+from typing import Union |
|
4 |
+ |
|
5 |
+from proces import preprocess |
|
6 |
+ |
|
7 |
+from .an2cn import An2Cn |
|
8 |
+from .conf import NUMBER_CN2AN, UNIT_CN2AN, STRICT_CN_NUMBER, NORMAL_CN_NUMBER, NUMBER_LOW_AN2CN, UNIT_LOW_AN2CN |
|
9 |
+ |
|
10 |
+ |
|
11 |
+class Cn2An(object): |
|
12 |
+ def __init__(self) -> None: |
|
13 |
+ self.all_num = "".join(list(NUMBER_CN2AN.keys())) |
|
14 |
+ self.all_unit = "".join(list(UNIT_CN2AN.keys())) |
|
15 |
+ self.strict_cn_number = STRICT_CN_NUMBER |
|
16 |
+ self.normal_cn_number = NORMAL_CN_NUMBER |
|
17 |
+ self.check_key_dict = { |
|
18 |
+ "strict": "".join(self.strict_cn_number.values()) + "点负", |
|
19 |
+ "normal": "".join(self.normal_cn_number.values()) + "点负", |
|
20 |
+ "smart": "".join(self.normal_cn_number.values()) + "点负" + "01234567890.-" |
|
21 |
+ } |
|
22 |
+ self.pattern_dict = self.__get_pattern() |
|
23 |
+ self.ac = An2Cn() |
|
24 |
+ self.mode_list = ["strict", "normal", "smart"] |
|
25 |
+ self.yjf_pattern = re.compile(fr"^.*?[元圆][{self.all_num}]角([{self.all_num}]分)?$") |
|
26 |
+ self.pattern1 = re.compile(fr"^-?\d+(\.\d+)?[{self.all_unit}]?$") |
|
27 |
+ self.ptn_all_num = re.compile(f"^[{self.all_num}]+$") |
|
28 |
+ # "十?" is for special case "十一万三" |
|
29 |
+ self.ptn_speaking_mode = re.compile(f"^([{self.all_num}]{{0,2}}[{self.all_unit}])+[{self.all_num}]$") |
|
30 |
+ |
|
31 |
+ def cn2an(self, inputs: Union[str, int, float] = None, mode: str = "strict") -> Union[float, int]: |
|
32 |
+ """中文数字转阿拉伯数字 |
|
33 |
+ |
|
34 |
+ :param inputs: 中文数字、阿拉伯数字、中文数字和阿拉伯数字 |
|
35 |
+ :param mode: strict 严格,normal 正常,smart 智能 |
|
36 |
+ :return: 阿拉伯数字 |
|
37 |
+ """ |
|
38 |
+ if inputs is not None or inputs == "": |
|
39 |
+ if mode not in self.mode_list: |
|
40 |
+ raise ValueError(f"mode 仅支持 {str(self.mode_list)} !") |
|
41 |
+ |
|
42 |
+ # 将数字转化为字符串 |
|
43 |
+ if not isinstance(inputs, str): |
|
44 |
+ inputs = str(inputs) |
|
45 |
+ |
|
46 |
+ # 数据预处理: |
|
47 |
+ # 1. 繁体转简体 |
|
48 |
+ # 2. 全角转半角 |
|
49 |
+ inputs = preprocess(inputs, pipelines=[ |
|
50 |
+ "traditional_to_simplified", |
|
51 |
+ "full_angle_to_half_angle" |
|
52 |
+ ]) |
|
53 |
+ |
|
54 |
+ # 特殊转化 廿 |
|
55 |
+ inputs = inputs.replace("廿", "二十") |
|
56 |
+ |
|
57 |
+ # 检查输入数据是否有效 |
|
58 |
+ sign, integer_data, decimal_data, is_all_num = self.__check_input_data_is_valid(inputs, mode) |
|
59 |
+ |
|
60 |
+ # smart 下的特殊情况 |
|
61 |
+ if sign == 0: |
|
62 |
+ return integer_data |
|
63 |
+ else: |
|
64 |
+ if not is_all_num: |
|
65 |
+ if decimal_data is None: |
|
66 |
+ output = self.__integer_convert(integer_data) |
|
67 |
+ else: |
|
68 |
+ output = self.__integer_convert(integer_data) + self.__decimal_convert(decimal_data) |
|
69 |
+ # fix 1 + 0.57 = 1.5699999999999998 |
|
70 |
+ output = round(output, len(decimal_data)) |
|
71 |
+ else: |
|
72 |
+ if decimal_data is None: |
|
73 |
+ output = self.__direct_convert(integer_data) |
|
74 |
+ else: |
|
75 |
+ output = self.__direct_convert(integer_data) + self.__decimal_convert(decimal_data) |
|
76 |
+ # fix 1 + 0.57 = 1.5699999999999998 |
|
77 |
+ output = round(output, len(decimal_data)) |
|
78 |
+ else: |
|
79 |
+ raise ValueError("输入数据为空!") |
|
80 |
+ |
|
81 |
+ return sign * output |
|
82 |
+ |
|
83 |
+ def __get_pattern(self) -> dict: |
|
84 |
+ # 整数严格检查 |
|
85 |
+ _0 = "[零]" |
|
86 |
+ _1_9 = "[一二三四五六七八九]" |
|
87 |
+ _10_99 = f"{_1_9}?[十]{_1_9}?" |
|
88 |
+ _1_99 = f"({_10_99}|{_1_9})" |
|
89 |
+ _100_999 = f"({_1_9}[百]([零]{_1_9})?|{_1_9}[百]{_10_99})" |
|
90 |
+ _1_999 = f"({_100_999}|{_1_99})" |
|
91 |
+ _1000_9999 = f"({_1_9}[千]([零]{_1_99})?|{_1_9}[千]{_100_999})" |
|
92 |
+ _1_9999 = f"({_1000_9999}|{_1_999})" |
|
93 |
+ _10000_99999999 = f"({_1_9999}[万]([零]{_1_999})?|{_1_9999}[万]{_1000_9999})" |
|
94 |
+ _1_99999999 = f"({_10000_99999999}|{_1_9999})" |
|
95 |
+ _100000000_9999999999999999 = f"({_1_99999999}[亿]([零]{_1_99999999})?|{_1_99999999}[亿]{_10000_99999999})" |
|
96 |
+ _1_9999999999999999 = f"({_100000000_9999999999999999}|{_1_99999999})" |
|
97 |
+ str_int_pattern = f"^({_0}|{_1_9999999999999999})$" |
|
98 |
+ nor_int_pattern = f"^({_0}|{_1_9999999999999999})$" |
|
99 |
+ |
|
100 |
+ str_dec_pattern = "^[零一二三四五六七八九]{0,15}[一二三四五六七八九]$" |
|
101 |
+ nor_dec_pattern = "^[零一二三四五六七八九]{0,16}$" |
|
102 |
+ |
|
103 |
+ for str_num in self.strict_cn_number.keys(): |
|
104 |
+ str_int_pattern = str_int_pattern.replace(str_num, self.strict_cn_number[str_num]) |
|
105 |
+ str_dec_pattern = str_dec_pattern.replace(str_num, self.strict_cn_number[str_num]) |
|
106 |
+ for nor_num in self.normal_cn_number.keys(): |
|
107 |
+ nor_int_pattern = nor_int_pattern.replace(nor_num, self.normal_cn_number[nor_num]) |
|
108 |
+ nor_dec_pattern = nor_dec_pattern.replace(nor_num, self.normal_cn_number[nor_num]) |
|
109 |
+ |
|
110 |
+ pattern_dict = { |
|
111 |
+ "strict": { |
|
112 |
+ "int": re.compile(str_int_pattern), |
|
113 |
+ "dec": re.compile(str_dec_pattern) |
|
114 |
+ }, |
|
115 |
+ "normal": { |
|
116 |
+ "int": re.compile(nor_int_pattern), |
|
117 |
+ "dec": re.compile(nor_dec_pattern) |
|
118 |
+ } |
|
119 |
+ } |
|
120 |
+ return pattern_dict |
|
121 |
+ |
|
122 |
+ def __copy_num(self, num): |
|
123 |
+ cn_num = "" |
|
124 |
+ for n in num: |
|
125 |
+ cn_num += NUMBER_LOW_AN2CN[int(n)] |
|
126 |
+ return cn_num |
|
127 |
+ |
|
128 |
+ def __check_input_data_is_valid(self, check_data: str, mode: str) -> (int, str, str, bool): |
|
129 |
+ # 去除 元整、圆整、元正、圆正 |
|
130 |
+ stop_words = ["元整", "圆整", "元正", "圆正"] |
|
131 |
+ for word in stop_words: |
|
132 |
+ if check_data[-2:] == word: |
|
133 |
+ check_data = check_data[:-2] |
|
134 |
+ |
|
135 |
+ # 去除 元、圆 |
|
136 |
+ if mode != "strict": |
|
137 |
+ normal_stop_words = ["圆", "元"] |
|
138 |
+ for word in normal_stop_words: |
|
139 |
+ if check_data[-1] == word: |
|
140 |
+ check_data = check_data[:-1] |
|
141 |
+ |
|
142 |
+ # 处理元角分 |
|
143 |
+ result = self.yjf_pattern.search(check_data) |
|
144 |
+ if result: |
|
145 |
+ check_data = check_data.replace("元", "点").replace("角", "").replace("分", "") |
|
146 |
+ |
|
147 |
+ # 处理特殊问法:一千零十一 一万零百一十一 |
|
148 |
+ if "零十" in check_data: |
|
149 |
+ check_data = check_data.replace("零十", "零一十") |
|
150 |
+ if "零百" in check_data: |
|
151 |
+ check_data = check_data.replace("零百", "零一百") |
|
152 |
+ |
|
153 |
+ for data in check_data: |
|
154 |
+ if data not in self.check_key_dict[mode]: |
|
155 |
+ raise ValueError(f"当前为{mode}模式,输入的数据不在转化范围内:{data}!") |
|
156 |
+ |
|
157 |
+ # 确定正负号 |
|
158 |
+ if check_data[0] == "负": |
|
159 |
+ check_data = check_data[1:] |
|
160 |
+ sign = -1 |
|
161 |
+ else: |
|
162 |
+ sign = 1 |
|
163 |
+ |
|
164 |
+ if "点" in check_data: |
|
165 |
+ split_data = check_data.split("点") |
|
166 |
+ if len(split_data) == 2: |
|
167 |
+ integer_data, decimal_data = split_data |
|
168 |
+ # 将 smart 模式中的阿拉伯数字转化成中文数字 |
|
169 |
+ if mode == "smart": |
|
170 |
+ integer_data = re.sub(r"\d+", lambda x: self.ac.an2cn(x.group()), integer_data) |
|
171 |
+ decimal_data = re.sub(r"\d+", lambda x: self.__copy_num(x.group()), decimal_data) |
|
172 |
+ mode = "normal" |
|
173 |
+ else: |
|
174 |
+ raise ValueError("数据中包含不止一个点!") |
|
175 |
+ else: |
|
176 |
+ integer_data = check_data |
|
177 |
+ decimal_data = None |
|
178 |
+ # 将 smart 模式中的阿拉伯数字转化成中文数字 |
|
179 |
+ if mode == "smart": |
|
180 |
+ # 10.1万 10.1 |
|
181 |
+ result1 = self.pattern1.search(integer_data) |
|
182 |
+ if result1: |
|
183 |
+ if result1.group() == integer_data: |
|
184 |
+ if integer_data[-1] in UNIT_CN2AN.keys(): |
|
185 |
+ output = int(float(integer_data[:-1]) * UNIT_CN2AN[integer_data[-1]]) |
|
186 |
+ else: |
|
187 |
+ output = float(integer_data) |
|
188 |
+ return 0, output, None, None |
|
189 |
+ |
|
190 |
+ integer_data = re.sub(r"\d+", lambda x: self.ac.an2cn(x.group()), integer_data) |
|
191 |
+ mode = "normal" |
|
192 |
+ |
|
193 |
+ result_int = self.pattern_dict[mode]["int"].search(integer_data) |
|
194 |
+ if result_int: |
|
195 |
+ if result_int.group() == integer_data: |
|
196 |
+ if decimal_data is not None: |
|
197 |
+ result_dec = self.pattern_dict[mode]["dec"].search(decimal_data) |
|
198 |
+ if result_dec: |
|
199 |
+ if result_dec.group() == decimal_data: |
|
200 |
+ return sign, integer_data, decimal_data, False |
|
201 |
+ else: |
|
202 |
+ return sign, integer_data, decimal_data, False |
|
203 |
+ else: |
|
204 |
+ if mode == "strict": |
|
205 |
+ raise ValueError(f"不符合格式的数据:{integer_data}") |
|
206 |
+ elif mode == "normal": |
|
207 |
+ # 纯数模式:一二三 |
|
208 |
+ result_all_num = self.ptn_all_num.search(integer_data) |
|
209 |
+ if result_all_num: |
|
210 |
+ if result_all_num.group() == integer_data: |
|
211 |
+ if decimal_data is not None: |
|
212 |
+ result_dec = self.pattern_dict[mode]["dec"].search(decimal_data) |
|
213 |
+ if result_dec: |
|
214 |
+ if result_dec.group() == decimal_data: |
|
215 |
+ return sign, integer_data, decimal_data, True |
|
216 |
+ else: |
|
217 |
+ return sign, integer_data, decimal_data, True |
|
218 |
+ |
|
219 |
+ # 口语模式:一万二,两千三,三百四,十三万六,一百二十五万三 |
|
220 |
+ result_speaking_mode = self.ptn_speaking_mode.search(integer_data) |
|
221 |
+ if len(integer_data) >= 3 and result_speaking_mode and result_speaking_mode.group() == integer_data: |
|
222 |
+ # len(integer_data)>=3: because the minimum length of integer_data that can be matched is 3 |
|
223 |
+ # to find the last unit |
|
224 |
+ last_unit = result_speaking_mode.groups()[-1][-1] |
|
225 |
+ _unit = UNIT_LOW_AN2CN[UNIT_CN2AN[last_unit] // 10] |
|
226 |
+ integer_data = integer_data + _unit |
|
227 |
+ if decimal_data is not None: |
|
228 |
+ result_dec = self.pattern_dict[mode]["dec"].search(decimal_data) |
|
229 |
+ if result_dec: |
|
230 |
+ if result_dec.group() == decimal_data: |
|
231 |
+ return sign, integer_data, decimal_data, False |
|
232 |
+ else: |
|
233 |
+ return sign, integer_data, decimal_data, False |
|
234 |
+ |
|
235 |
+ raise ValueError(f"不符合格式的数据:{check_data}") |
|
236 |
+ |
|
237 |
+ def __integer_convert(self, integer_data: str) -> int: |
|
238 |
+ # 核心 |
|
239 |
+ output_integer = 0 |
|
240 |
+ unit = 1 |
|
241 |
+ ten_thousand_unit = 1 |
|
242 |
+ for index, cn_num in enumerate(reversed(integer_data)): |
|
243 |
+ # 数值 |
|
244 |
+ if cn_num in NUMBER_CN2AN: |
|
245 |
+ num = NUMBER_CN2AN[cn_num] |
|
246 |
+ output_integer += num * unit |
|
247 |
+ # 单位 |
|
248 |
+ elif cn_num in UNIT_CN2AN: |
|
249 |
+ unit = UNIT_CN2AN[cn_num] |
|
250 |
+ # 判断出万、亿、万亿 |
|
251 |
+ if unit % 10000 == 0: |
|
252 |
+ # 万 亿 |
|
253 |
+ if unit > ten_thousand_unit: |
|
254 |
+ ten_thousand_unit = unit |
|
255 |
+ # 万亿 |
|
256 |
+ else: |
|
257 |
+ ten_thousand_unit = unit * ten_thousand_unit |
|
258 |
+ unit = ten_thousand_unit |
|
259 |
+ |
|
260 |
+ if unit < ten_thousand_unit: |
|
261 |
+ unit = unit * ten_thousand_unit |
|
262 |
+ |
|
263 |
+ if index == len(integer_data) - 1: |
|
264 |
+ output_integer += unit |
|
265 |
+ else: |
|
266 |
+ raise ValueError(f"{cn_num} 不在转化范围内") |
|
267 |
+ |
|
268 |
+ return int(output_integer) |
|
269 |
+ |
|
270 |
+ def __decimal_convert(self, decimal_data: str) -> float: |
|
271 |
+ len_decimal_data = len(decimal_data) |
|
272 |
+ |
|
273 |
+ if len_decimal_data > 16: |
|
274 |
+ warn(f"注意:小数部分长度为 {len_decimal_data} ,将自动截取前 16 位有效精度!") |
|
275 |
+ decimal_data = decimal_data[:16] |
|
276 |
+ len_decimal_data = 16 |
|
277 |
+ |
|
278 |
+ output_decimal = 0 |
|
279 |
+ for index in range(len(decimal_data) - 1, -1, -1): |
|
280 |
+ unit_key = NUMBER_CN2AN[decimal_data[index]] |
|
281 |
+ output_decimal += unit_key * 10 ** -(index + 1) |
|
282 |
+ |
|
283 |
+ # 处理精度溢出问题 |
|
284 |
+ output_decimal = round(output_decimal, len_decimal_data) |
|
285 |
+ |
|
286 |
+ return output_decimal |
|
287 |
+ |
|
288 |
+ def __direct_convert(self, data: str) -> int: |
|
289 |
+ output_data = 0 |
|
290 |
+ for index in range(len(data) - 1, -1, -1): |
|
291 |
+ unit_key = NUMBER_CN2AN[data[index]] |
|
292 |
+ output_data += unit_key * 10 ** (len(data) - index - 1) |
|
293 |
+ |
|
294 |
+ return output_data |
@@ -0,0 +1,215 @@ |
||
1 |
+import unittest |
|
2 |
+ |
|
3 |
+from .cn2an import Cn2An |
|
4 |
+ |
|
5 |
+ |
|
6 |
+class Cn2anTest(unittest.TestCase): |
|
7 |
+ def setUp(self) -> None: |
|
8 |
+ self.strict_data_dict = { |
|
9 |
+ "零": 0, |
|
10 |
+ "一": 1, |
|
11 |
+ "十": 10, |
|
12 |
+ "十一": 11, |
|
13 |
+ "一十一": 11, |
|
14 |
+ "二十": 20, |
|
15 |
+ "二十一": 21, |
|
16 |
+ "一百": 100, |
|
17 |
+ "一百零一": 101, |
|
18 |
+ "一百一十": 110, |
|
19 |
+ "一百一十一": 111, |
|
20 |
+ "一千": 1000, |
|
21 |
+ "一千一百": 1100, |
|
22 |
+ "一千一百一十": 1110, |
|
23 |
+ "一千一百一十一": 1111, |
|
24 |
+ "一千零一十": 1010, |
|
25 |
+ "一千零十": 1010, |
|
26 |
+ "一千零十一": 1011, |
|
27 |
+ "一千零一十一": 1011, |
|
28 |
+ "一千零一": 1001, |
|
29 |
+ "一千一百零一": 1101, |
|
30 |
+ "一万一千一百一十一": 11111, |
|
31 |
+ "一十一万一千一百一十一": 111111, |
|
32 |
+ "一百一十一万一千一百一十一": 1111111, |
|
33 |
+ "一千一百一十一万一千一百一十一": 11111111, |
|
34 |
+ "一亿一千一百一十一万一千一百一十一": 111111111, |
|
35 |
+ "一十一亿一千一百一十一万一千一百一十一": 1111111111, |
|
36 |
+ "一百一十一亿一千一百一十一万一千一百一十一": 11111111111, |
|
37 |
+ "一千一百一十一亿一千一百一十一万一千一百一十一": 111111111111, |
|
38 |
+ "一千一百一十一万一千一百一十一亿一千一百一十一万一千一百一十一": 1111111111111111, |
|
39 |
+ "壹": 1, |
|
40 |
+ "拾": 10, |
|
41 |
+ "拾壹": 11, |
|
42 |
+ "壹拾壹": 11, |
|
43 |
+ "壹佰壹拾壹": 111, |
|
44 |
+ "壹仟壹佰壹拾壹": 1111, |
|
45 |
+ "壹万壹仟壹佰壹拾壹": 11111, |
|
46 |
+ "壹拾壹万壹仟壹佰壹拾壹": 111111, |
|
47 |
+ "壹佰壹拾壹万壹仟壹佰壹拾壹": 1111111, |
|
48 |
+ "壹仟壹佰壹拾壹万壹仟壹佰壹拾壹": 11111111, |
|
49 |
+ "壹亿壹仟壹佰壹拾壹万壹仟壹佰壹拾壹": 111111111, |
|
50 |
+ "壹拾壹亿壹仟壹佰壹拾壹万壹仟壹佰壹拾壹": 1111111111, |
|
51 |
+ "壹佰壹拾壹亿壹仟壹佰壹拾壹万壹仟壹佰壹拾壹": 11111111111, |
|
52 |
+ "壹仟壹佰壹拾壹亿壹仟壹佰壹拾壹万壹仟壹佰壹拾壹": 111111111111, |
|
53 |
+ "壹拾壹元整": 11, |
|
54 |
+ "壹佰壹拾壹圆整": 111, |
|
55 |
+ "壹拾壹元正": 11, |
|
56 |
+ "壹拾壹圆正": 11, |
|
57 |
+ "壹拾壹元壹角": 11.1, |
|
58 |
+ "壹拾壹元壹角壹分": 11.11, |
|
59 |
+ "十万": 100000, |
|
60 |
+ "十万零一": 100001, |
|
61 |
+ "一万零一": 10001, |
|
62 |
+ "一万零一十一": 10011, |
|
63 |
+ "一万零一百一十一": 10111, |
|
64 |
+ "一万零百一十一": 10111, |
|
65 |
+ "一十万零一": 100001, |
|
66 |
+ "一百万零一": 1000001, |
|
67 |
+ "一千万零一": 10000001, |
|
68 |
+ "一千零一万一千零一": 10011001, |
|
69 |
+ "一千零一万零一": 10010001, |
|
70 |
+ "一亿零一": 100000001, |
|
71 |
+ "一十亿零一": 1000000001, |
|
72 |
+ "一百亿零一": 10000000001, |
|
73 |
+ "一千零一亿一千零一万一千零一": 100110011001, |
|
74 |
+ "一千亿一千万一千零一": 100010001001, |
|
75 |
+ "一千亿零一": 100000000001, |
|
76 |
+ "零点零零零零零零零零零零零零零零一": 0.000000000000001, |
|
77 |
+ "零点零零零零零零零零零零零零零一": 0.00000000000001, |
|
78 |
+ "零点零零零零零零零零零零零零一": 0.0000000000001, |
|
79 |
+ "零点零零零零零零零零零零零一": 0.000000000001, |
|
80 |
+ "零点零零零零零零零零零零一": 0.00000000001, |
|
81 |
+ "零点零零零零零零零零零一": 0.0000000001, |
|
82 |
+ "零点零零零零零零零零一": 0.000000001, |
|
83 |
+ "零点零零零零零零零一": 0.00000001, |
|
84 |
+ "零点零零零零零零一": 0.0000001, |
|
85 |
+ "零点零零零零零一": 0.000001, |
|
86 |
+ "零点零零零零一": 0.00001, |
|
87 |
+ "零点零零零一": 0.0001, |
|
88 |
+ "零点零零一": 0.001, |
|
89 |
+ "零点零一": 0.01, |
|
90 |
+ "零点一": 0.1, |
|
91 |
+ "负一": -1, |
|
92 |
+ "负二": -2, |
|
93 |
+ "负十": -10, |
|
94 |
+ "负十一": -11, |
|
95 |
+ "负一十一": -11, |
|
96 |
+ # 古语 |
|
97 |
+ "廿二": 22, |
|
98 |
+ } |
|
99 |
+ |
|
100 |
+ self.normal_data_dict = { |
|
101 |
+ "一一": 11, |
|
102 |
+ "一一一": 111, |
|
103 |
+ "壹壹": 11, |
|
104 |
+ "壹壹壹": 111, |
|
105 |
+ "零点零": 0, |
|
106 |
+ "零点零零": 0, |
|
107 |
+ "一七二零": 1720, |
|
108 |
+ "一七二零点一": 1720.1, |
|
109 |
+ "一七二零点一三四": 1720.134, |
|
110 |
+ "一二三": 123, |
|
111 |
+ "负零点一零": -0.1, |
|
112 |
+ "负一七二零": -1720, |
|
113 |
+ "负一七二零点一": -1720.1, |
|
114 |
+ # 口语 |
|
115 |
+ "三万五": 35000, |
|
116 |
+ "十三万五": 135000, |
|
117 |
+ "两千六": 2600, |
|
118 |
+ "一百二": 120, |
|
119 |
+ "一百二十万三": 1203000, |
|
120 |
+ # 繁体 |
|
121 |
+ "兩千六": 2600, |
|
122 |
+ # 大写 |
|
123 |
+ "壹拾壹元": 11, |
|
124 |
+ "壹佰壹拾壹圆": 111, |
|
125 |
+ "壹拾壹圆": 11, |
|
126 |
+ # 特殊 |
|
127 |
+ "〇": 0, |
|
128 |
+ } |
|
129 |
+ |
|
130 |
+ self.smart_data_dict = { |
|
131 |
+ "100万": 1000000, |
|
132 |
+ "100万三千": 1003000, |
|
133 |
+ "200亿零四千230": 20000004230, |
|
134 |
+ "一百点123": 100.123, |
|
135 |
+ "10.1万": 101000, |
|
136 |
+ "-10.1万": -101000, |
|
137 |
+ "35.1亿": 3510000000, |
|
138 |
+ "10.1": 10.1, |
|
139 |
+ "-10.1": -10.1, |
|
140 |
+ } |
|
141 |
+ |
|
142 |
+ self.error_smart_datas = [ |
|
143 |
+ "10.1万零100", |
|
144 |
+ "10..1万", |
|
145 |
+ ] |
|
146 |
+ |
|
147 |
+ self.error_normal_datas = [ |
|
148 |
+ "零点", |
|
149 |
+ "点零", |
|
150 |
+ "零点点", |
|
151 |
+ "零点零大", |
|
152 |
+ ] |
|
153 |
+ self.error_normal_datas.extend(self.error_smart_datas) |
|
154 |
+ self.error_normal_datas.extend(list(self.smart_data_dict.keys())) |
|
155 |
+ |
|
156 |
+ self.error_strict_datas = [ |
|
157 |
+ "一一", |
|
158 |
+ "壹壹", |
|
159 |
+ "零点", |
|
160 |
+ "点零", |
|
161 |
+ "点一", |
|
162 |
+ "百十一", |
|
163 |
+ "十一十二", |
|
164 |
+ "负十一十二", |
|
165 |
+ "十七十八", |
|
166 |
+ ] |
|
167 |
+ self.error_strict_datas.extend(self.error_normal_datas) |
|
168 |
+ self.error_strict_datas.extend(list(self.normal_data_dict.keys())) |
|
169 |
+ |
|
170 |
+ # 不可修改位置 |
|
171 |
+ self.normal_data_dict.update(self.strict_data_dict) |
|
172 |
+ self.smart_data_dict.update(self.normal_data_dict) |
|
173 |
+ |
|
174 |
+ self.ca = Cn2An() |
|
175 |
+ |
|
176 |
+ def test_cn2an(self) -> None: |
|
177 |
+ for strict_item in self.strict_data_dict.keys(): |
|
178 |
+ self.assertEqual(self.ca.cn2an(strict_item, "strict"), |
|
179 |
+ self.strict_data_dict[strict_item]) |
|
180 |
+ |
|
181 |
+ for normal_item in self.normal_data_dict.keys(): |
|
182 |
+ self.assertEqual(self.ca.cn2an(normal_item, "normal"), |
|
183 |
+ self.normal_data_dict[normal_item]) |
|
184 |
+ |
|
185 |
+ for smart_item in self.smart_data_dict.keys(): |
|
186 |
+ self.assertEqual(self.ca.cn2an(smart_item, "smart"), |
|
187 |
+ self.smart_data_dict[smart_item]) |
|
188 |
+ |
|
189 |
+ for error_strict_item in self.error_strict_datas: |
|
190 |
+ try: |
|
191 |
+ self.ca.cn2an(error_strict_item) |
|
192 |
+ except ValueError as e: |
|
193 |
+ self.assertEqual(type(e), ValueError) |
|
194 |
+ else: |
|
195 |
+ raise Exception(f'ValueError not raised: {error_strict_item}') |
|
196 |
+ |
|
197 |
+ for error_normal_item in self.error_normal_datas: |
|
198 |
+ try: |
|
199 |
+ self.ca.cn2an(error_normal_item) |
|
200 |
+ except ValueError as e: |
|
201 |
+ self.assertEqual(type(e), ValueError) |
|
202 |
+ else: |
|
203 |
+ raise Exception(f'ValueError not raised: {error_normal_item}') |
|
204 |
+ |
|
205 |
+ for error_smart_item in self.error_smart_datas: |
|
206 |
+ try: |
|
207 |
+ self.ca.cn2an(error_smart_item) |
|
208 |
+ except ValueError as e: |
|
209 |
+ self.assertEqual(type(e), ValueError) |
|
210 |
+ else: |
|
211 |
+ raise Exception(f'ValueError not raised: {error_smart_item}') |
|
212 |
+ |
|
213 |
+ |
|
214 |
+if __name__ == '__main__': |
|
215 |
+ unittest.main() |
@@ -0,0 +1,135 @@ |
||
1 |
+NUMBER_CN2AN = { |
|
2 |
+ "零": 0, |
|
3 |
+ "〇": 0, |
|
4 |
+ "一": 1, |
|
5 |
+ "壹": 1, |
|
6 |
+ "幺": 1, |
|
7 |
+ "二": 2, |
|
8 |
+ "贰": 2, |
|
9 |
+ "两": 2, |
|
10 |
+ "三": 3, |
|
11 |
+ "叁": 3, |
|
12 |
+ "四": 4, |
|
13 |
+ "肆": 4, |
|
14 |
+ "五": 5, |
|
15 |
+ "伍": 5, |
|
16 |
+ "六": 6, |
|
17 |
+ "陆": 6, |
|
18 |
+ "七": 7, |
|
19 |
+ "柒": 7, |
|
20 |
+ "八": 8, |
|
21 |
+ "捌": 8, |
|
22 |
+ "九": 9, |
|
23 |
+ "玖": 9, |
|
24 |
+} |
|
25 |
+UNIT_CN2AN = { |
|
26 |
+ "十": 10, |
|
27 |
+ "拾": 10, |
|
28 |
+ "百": 100, |
|
29 |
+ "佰": 100, |
|
30 |
+ "千": 1000, |
|
31 |
+ "仟": 1000, |
|
32 |
+ "万": 10000, |
|
33 |
+ "亿": 100000000, |
|
34 |
+} |
|
35 |
+UNIT_LOW_AN2CN = { |
|
36 |
+ 10: "十", |
|
37 |
+ 100: "百", |
|
38 |
+ 1000: "千", |
|
39 |
+ 10000: "万", |
|
40 |
+ 100000000: "亿", |
|
41 |
+} |
|
42 |
+NUMBER_LOW_AN2CN = { |
|
43 |
+ 0: "零", |
|
44 |
+ 1: "一", |
|
45 |
+ 2: "二", |
|
46 |
+ 3: "三", |
|
47 |
+ 4: "四", |
|
48 |
+ 5: "五", |
|
49 |
+ 6: "六", |
|
50 |
+ 7: "七", |
|
51 |
+ 8: "八", |
|
52 |
+ 9: "九", |
|
53 |
+} |
|
54 |
+NUMBER_UP_AN2CN = { |
|
55 |
+ 0: "零", |
|
56 |
+ 1: "壹", |
|
57 |
+ 2: "贰", |
|
58 |
+ 3: "叁", |
|
59 |
+ 4: "肆", |
|
60 |
+ 5: "伍", |
|
61 |
+ 6: "陆", |
|
62 |
+ 7: "柒", |
|
63 |
+ 8: "捌", |
|
64 |
+ 9: "玖", |
|
65 |
+} |
|
66 |
+UNIT_LOW_ORDER_AN2CN = [ |
|
67 |
+ "", |
|
68 |
+ "十", |
|
69 |
+ "百", |
|
70 |
+ "千", |
|
71 |
+ "万", |
|
72 |
+ "十", |
|
73 |
+ "百", |
|
74 |
+ "千", |
|
75 |
+ "亿", |
|
76 |
+ "十", |
|
77 |
+ "百", |
|
78 |
+ "千", |
|
79 |
+ "万", |
|
80 |
+ "十", |
|
81 |
+ "百", |
|
82 |
+ "千", |
|
83 |
+] |
|
84 |
+UNIT_UP_ORDER_AN2CN = [ |
|
85 |
+ "", |
|
86 |
+ "拾", |
|
87 |
+ "佰", |
|
88 |
+ "仟", |
|
89 |
+ "万", |
|
90 |
+ "拾", |
|
91 |
+ "佰", |
|
92 |
+ "仟", |
|
93 |
+ "亿", |
|
94 |
+ "拾", |
|
95 |
+ "佰", |
|
96 |
+ "仟", |
|
97 |
+ "万", |
|
98 |
+ "拾", |
|
99 |
+ "佰", |
|
100 |
+ "仟", |
|
101 |
+] |
|
102 |
+STRICT_CN_NUMBER = { |
|
103 |
+ "零": "零", |
|
104 |
+ "一": "一壹", |
|
105 |
+ "二": "二贰", |
|
106 |
+ "三": "三叁", |
|
107 |
+ "四": "四肆", |
|
108 |
+ "五": "五伍", |
|
109 |
+ "六": "六陆", |
|
110 |
+ "七": "七柒", |
|
111 |
+ "八": "八捌", |
|
112 |
+ "九": "九玖", |
|
113 |
+ "十": "十拾", |
|
114 |
+ "百": "百佰", |
|
115 |
+ "千": "千仟", |
|
116 |
+ "万": "万", |
|
117 |
+ "亿": "亿", |
|
118 |
+} |
|
119 |
+NORMAL_CN_NUMBER = { |
|
120 |
+ "零": "零〇", |
|
121 |
+ "一": "一壹幺", |
|
122 |
+ "二": "二贰两", |
|
123 |
+ "三": "三叁仨", |
|
124 |
+ "四": "四肆", |
|
125 |
+ "五": "五伍", |
|
126 |
+ "六": "六陆", |
|
127 |
+ "七": "七柒", |
|
128 |
+ "八": "八捌", |
|
129 |
+ "九": "九玖", |
|
130 |
+ "十": "十拾", |
|
131 |
+ "百": "百佰", |
|
132 |
+ "千": "千仟", |
|
133 |
+ "万": "万", |
|
134 |
+ "亿": "亿", |
|
135 |
+} |
@@ -0,0 +1,29 @@ |
||
1 |
+import torbjorn as tbn |
|
2 |
+ |
|
3 |
+from .an2cn import An2Cn |
|
4 |
+from .cn2an import Cn2An |
|
5 |
+ |
|
6 |
+ac = An2Cn() |
|
7 |
+ca = Cn2An() |
|
8 |
+ |
|
9 |
+an = 9876543298765432 |
|
10 |
+cn = "九千八百七十六万五千四百三十二亿九千八百七十六万五千四百三十二" |
|
11 |
+ |
|
12 |
+ |
|
13 |
+@tbn.run_time |
|
14 |
+def run_cn2an_ten_thousand_times() -> None: |
|
15 |
+ for _ in range(10000): |
|
16 |
+ result = ca.cn2an(cn) |
|
17 |
+ assert result == an |
|
18 |
+ |
|
19 |
+ |
|
20 |
+@tbn.run_time |
|
21 |
+def run_an2cn_ten_thousand_times() -> None: |
|
22 |
+ for _ in range(10000): |
|
23 |
+ result = ac.an2cn(an) |
|
24 |
+ assert result == cn |
|
25 |
+ |
|
26 |
+ |
|
27 |
+if __name__ == '__main__': |
|
28 |
+ run_cn2an_ten_thousand_times() |
|
29 |
+ run_an2cn_ten_thousand_times() |
@@ -0,0 +1,104 @@ |
||
1 |
+import re |
|
2 |
+from warnings import warn |
|
3 |
+ |
|
4 |
+from .cn2an import Cn2An |
|
5 |
+from .an2cn import An2Cn |
|
6 |
+from .conf import UNIT_CN2AN |
|
7 |
+ |
|
8 |
+ |
|
9 |
+class Transform(object): |
|
10 |
+ def __init__(self) -> None: |
|
11 |
+ self.all_num = "零一二三四五六七八九" |
|
12 |
+ self.all_unit = "".join(list(UNIT_CN2AN.keys())) |
|
13 |
+ self.cn2an = Cn2An().cn2an |
|
14 |
+ self.an2cn = An2Cn().an2cn |
|
15 |
+ self.cn_pattern = f"负?([{self.all_num}{self.all_unit}]+点)?[{self.all_num}{self.all_unit}]+" |
|
16 |
+ self.smart_cn_pattern = f"-?([0-9]+.)?[0-9]+[{self.all_unit}]+" |
|
17 |
+ |
|
18 |
+ def transform(self, inputs: str, method: str = "cn2an") -> str: |
|
19 |
+ if method == "cn2an": |
|
20 |
+ inputs = inputs.replace("廿", "二十").replace("半", "0.5").replace("两", "2") |
|
21 |
+ # date |
|
22 |
+ inputs = re.sub( |
|
23 |
+ fr"((({self.smart_cn_pattern})|({self.cn_pattern}))年)?([{self.all_num}十]+月)?([{self.all_num}十]+日)?", |
|
24 |
+ lambda x: self.__sub_util(x.group(), "cn2an", "date"), inputs) |
|
25 |
+ # fraction |
|
26 |
+ inputs = re.sub(fr"{self.cn_pattern}分之{self.cn_pattern}", |
|
27 |
+ lambda x: self.__sub_util(x.group(), "cn2an", "fraction"), inputs) |
|
28 |
+ # percent |
|
29 |
+ inputs = re.sub(fr"百分之{self.cn_pattern}", |
|
30 |
+ lambda x: self.__sub_util(x.group(), "cn2an", "percent"), inputs) |
|
31 |
+ # celsius |
|
32 |
+ inputs = re.sub(fr"{self.cn_pattern}摄氏度", |
|
33 |
+ lambda x: self.__sub_util(x.group(), "cn2an", "celsius"), inputs) |
|
34 |
+ # number |
|
35 |
+ output = re.sub(self.cn_pattern, |
|
36 |
+ lambda x: self.__sub_util(x.group(), "cn2an", "number"), inputs) |
|
37 |
+ |
|
38 |
+ elif method == "an2cn": |
|
39 |
+ # date |
|
40 |
+ inputs = re.sub(r"(\d{2,4}年)?(\d{1,2}月)?(\d{1,2}日)?", |
|
41 |
+ lambda x: self.__sub_util(x.group(), "an2cn", "date"), inputs) |
|
42 |
+ # fraction |
|
43 |
+ inputs = re.sub(r"\d+/\d+", |
|
44 |
+ lambda x: self.__sub_util(x.group(), "an2cn", "fraction"), inputs) |
|
45 |
+ # percent |
|
46 |
+ inputs = re.sub(r"-?(\d+\.)?\d+%", |
|
47 |
+ lambda x: self.__sub_util(x.group(), "an2cn", "percent"), inputs) |
|
48 |
+ # celsius |
|
49 |
+ inputs = re.sub(r"\d+℃", |
|
50 |
+ lambda x: self.__sub_util(x.group(), "an2cn", "celsius"), inputs) |
|
51 |
+ # number |
|
52 |
+ output = re.sub(r"-?(\d+\.)?\d+", |
|
53 |
+ lambda x: self.__sub_util(x.group(), "an2cn", "number"), inputs) |
|
54 |
+ else: |
|
55 |
+ raise ValueError(f"error method: {method}, only support 'cn2an' and 'an2cn'!") |
|
56 |
+ |
|
57 |
+ return output |
|
58 |
+ |
|
59 |
+ def __sub_util(self, inputs, method: str = "cn2an", sub_mode: str = "number") -> str: |
|
60 |
+ try: |
|
61 |
+ if inputs: |
|
62 |
+ if method == "cn2an": |
|
63 |
+ if sub_mode == "date": |
|
64 |
+ return re.sub(fr"(({self.smart_cn_pattern})|({self.cn_pattern}))", |
|
65 |
+ lambda x: str(self.cn2an(x.group(), "smart")), inputs) |
|
66 |
+ elif sub_mode == "fraction": |
|
67 |
+ if inputs[0] != "百": |
|
68 |
+ frac_result = re.sub(self.cn_pattern, |
|
69 |
+ lambda x: str(self.cn2an(x.group(), "smart")), inputs) |
|
70 |
+ numerator, denominator = frac_result.split("分之") |
|
71 |
+ return f"{denominator}/{numerator}" |
|
72 |
+ else: |
|
73 |
+ return inputs |
|
74 |
+ elif sub_mode == "percent": |
|
75 |
+ return re.sub(f"(?<=百分之){self.cn_pattern}", |
|
76 |
+ lambda x: str(self.cn2an(x.group(), "smart")), inputs).replace("百分之", "") + "%" |
|
77 |
+ elif sub_mode == "celsius": |
|
78 |
+ return re.sub(f"{self.cn_pattern}(?=摄氏度)", |
|
79 |
+ lambda x: str(self.cn2an(x.group(), "smart")), inputs).replace("摄氏度", "℃") |
|
80 |
+ elif sub_mode == "number": |
|
81 |
+ return str(self.cn2an(inputs, "smart")) |
|
82 |
+ else: |
|
83 |
+ raise Exception(f"error sub_mode: {sub_mode} !") |
|
84 |
+ else: |
|
85 |
+ if sub_mode == "date": |
|
86 |
+ inputs = re.sub(r"\d+(?=年)", |
|
87 |
+ lambda x: self.an2cn(x.group(), "direct"), inputs) |
|
88 |
+ return re.sub(r"\d+", |
|
89 |
+ lambda x: self.an2cn(x.group(), "low"), inputs) |
|
90 |
+ elif sub_mode == "fraction": |
|
91 |
+ frac_result = re.sub(r"\d+", lambda x: self.an2cn(x.group(), "low"), inputs) |
|
92 |
+ numerator, denominator = frac_result.split("/") |
|
93 |
+ return f"{denominator}分之{numerator}" |
|
94 |
+ elif sub_mode == "celsius": |
|
95 |
+ return self.an2cn(inputs[:-1], "low") + "摄氏度" |
|
96 |
+ elif sub_mode == "percent": |
|
97 |
+ return "百分之" + self.an2cn(inputs[:-1], "low") |
|
98 |
+ elif sub_mode == "number": |
|
99 |
+ return self.an2cn(inputs, "low") |
|
100 |
+ else: |
|
101 |
+ raise Exception(f"error sub_mode: {sub_mode} !") |
|
102 |
+ except Exception as e: |
|
103 |
+ warn(str(e)) |
|
104 |
+ return inputs |
@@ -0,0 +1,40 @@ |
||
1 |
+import unittest |
|
2 |
+ |
|
3 |
+from .transform import Transform |
|
4 |
+ |
|
5 |
+ |
|
6 |
+class TransformTest(unittest.TestCase): |
|
7 |
+ def setUp(self) -> None: |
|
8 |
+ self.strict_data_dict = { |
|
9 |
+ "小王捡了100块钱": "小王捡了一百块钱", |
|
10 |
+ "用户增长最快的3个城市": "用户增长最快的三个城市", |
|
11 |
+ "小王的生日是2001年3月4日": "小王的生日是二零零一年三月四日", |
|
12 |
+ "小王的生日是2012年12月12日": "小王的生日是二零一二年十二月十二日", |
|
13 |
+ "今天股价上涨了8%": "今天股价上涨了百分之八", |
|
14 |
+ "第2天股价下降了-3.8%": "第二天股价下降了百分之负三点八", |
|
15 |
+ "抛出去的硬币为正面的概率是1/2": "抛出去的硬币为正面的概率是二分之一", |
|
16 |
+ "现在室内温度为39℃,很热啊!": "现在室内温度为三十九摄氏度,很热啊!", |
|
17 |
+ "创业板指9月9日早盘低开1.57%": "创业板指九月九日早盘低开百分之一点五七" |
|
18 |
+ } |
|
19 |
+ |
|
20 |
+ self.smart_data_dict = { |
|
21 |
+ "约2.5亿年~6500万年": "约250000000年~65000000年", |
|
22 |
+ "廿二日,日出东方": "22日,日出东方", |
|
23 |
+ "大陆": "大陆", |
|
24 |
+ "半斤": "0.5斤", |
|
25 |
+ "两个": "2个", |
|
26 |
+ } |
|
27 |
+ |
|
28 |
+ self.t = Transform() |
|
29 |
+ |
|
30 |
+ def test_transform(self) -> None: |
|
31 |
+ for strict_item in self.strict_data_dict.keys(): |
|
32 |
+ self.assertEqual(self.t.transform(strict_item, "an2cn"), self.strict_data_dict[strict_item]) |
|
33 |
+ self.assertEqual(self.t.transform(self.strict_data_dict[strict_item], "cn2an"), strict_item) |
|
34 |
+ |
|
35 |
+ for smart_item in self.smart_data_dict.keys(): |
|
36 |
+ self.assertEqual(self.t.transform(smart_item, "cn2an"), self.smart_data_dict[smart_item]) |
|
37 |
+ |
|
38 |
+ |
|
39 |
+if __name__ == '__main__': |
|
40 |
+ unittest.main() |