[从头读历史] 第281节 始制文字 世界上的语系及语言
Posted mwsister
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了[从头读历史] 第281节 始制文字 世界上的语系及语言相关的知识,希望对你有一定的参考价值。
剧情提要:春秋时期,中华混沌帝国一片兴旺发达的景象,但这在厄尔斯星球上并不是孤例。
那么在那个时期,星球各地的人们用什么语言在交流呢?他们的文字又是怎样的呢?
正剧开始:
星历2016年07月03日 09:36:51, 银河系厄尔斯星球中华帝国江南行省。
[工程师阿伟]正在和[机器小伟]一起研究[始制文字 世界上的语系及语言]。
下面看一下音节:
<span style="font-size:18px;">import struct;
import math;
'''
import os
from os.path import join, getsize
import voice.tian, voice.di, voice.xuan, voice.huang;
'''
#辅音音节
class Voice():
def idleWave(self):
return [b'\\x00']*200;
def consonant(self):
terms = [
'b', 'c', 'ch', 'd', 'f', 'g',
'h', 'j', 'k', 'l', 'm', 'n',
'p', 'q', 'r', 's', 'sh', 't',
'v', 'w', 'x', 'y', 'z'
];
return terms;
def vowel(self):
terms = [
'a', 'ai', 'an', 'ang', 'ao',
'e', 'ei', 'en', 'eng', 'er',
'i', 'ian', 'iang', 'ie', 'in', 'ing', 'iong', 'iu'
'o', 'oi', 'on', 'ong', 'ou',
'u', 'ua', 'uan', 'uang', 'ue', 'uei', 'uen', 'ueng',
'üe', 'ün'
];
return terms;
def pingYin(self):
terms = [#410项
'a', 'ai', 'an', 'ang', 'ao', 'ba', 'bai', 'ban', 'bang', 'bao',
'bei', 'ben', 'beng', 'bi', 'bian', 'biao', 'bie', 'bin', 'bing', 'bo',
'bu', 'ca', 'cai', 'can', 'cang', 'cao', 'ce', 'cen', 'ceng', 'cha',
'chai', 'chan', 'chang', 'chao', 'che', 'chen', 'cheng', 'chi', 'chong', 'chou',
'chu', 'chua', 'chuai', 'chuan', 'chuang', 'chui', 'chun', 'chuo', 'ci', 'cong',
'cou', 'cu', 'cuan', 'cui', 'cun', 'cuo', 'da', 'dai', 'dan', 'dang',
'dao', 'de', 'den', 'dei', 'deng', 'di', 'dia', 'dian', 'diao', 'die',
'ding', 'diu', 'dong', 'dou', 'du', 'duan', 'dui', 'dun', 'duo', 'e',
'ei', 'en', 'eng', 'er', 'fa', 'fan', 'fang', 'fei', 'fen', 'feng',
'fo', 'fou', 'fu', 'ga', 'gai', 'gan', 'gang', 'gao', 'ge', 'gei',
'gen', 'geng', 'gong', 'gou', 'gu', 'gua', 'guai', 'guan', 'guang', 'gui',
'gun', 'guo', 'ha', 'hai', 'han', 'hang', 'hao', 'he', 'hei', 'hen',
'heng', 'hong', 'hou', 'hu', 'hua', 'huai', 'huan', 'huang', 'hui', 'hun',
'huo', 'ji', 'jia', 'jian', 'jiang', 'jiao', 'jie', 'jin', 'jing', 'jiong',
'jiu', 'ju', 'juan', 'jue', 'jun', 'ka', 'kai', 'kan', 'kang', 'kao',
'ke', 'ken', 'keng', 'kong', 'kou', 'ku', 'kua', 'kuai', 'kuan', 'kuang',
'kui', 'kun', 'kuo', 'la', 'lai', 'lan', 'lang', 'lao', 'le', 'lei',
'leng', 'li', 'lia', 'lian', 'liang', 'liao', 'lie', 'lin', 'ling', 'liu',
'long', 'lou', 'lu', 'lü', 'luan', 'lue', 'lüe', 'lun', 'luo', 'm',
'ma', 'mai', 'man', 'mang', 'mao', 'me', 'mei', 'men', 'meng', 'mi',
'mian', 'miao', 'mie', 'min', 'ming', 'miu', 'mo', 'mou', 'mu', 'na',
'nai', 'nan', 'nang', 'nao', 'ne', 'nei', 'nen', 'neng', 'ni', 'nian',
'niang', 'niao', 'nie', 'nin', 'ning', 'niu', 'nong', 'nou', 'nu', 'nü',
'nuan', 'nüe', 'nuo', 'nun', 'o', 'ou', 'pa', 'pai', 'pan', 'pang',
'pao', 'pei', 'pen', 'peng', 'pi', 'pian', 'piao', 'pie', 'pin', 'ping',
'po', 'pou', 'pu', 'qi', 'qia', 'qian', 'qiang', 'qiao', 'qie', 'qin',
'qing', 'qiong', 'qiu', 'qu', 'quan', 'que', 'qun', 'ran', 'rang', 'rao',
're', 'ren', 'reng', 'ri', 'rong', 'rou', 'ru', 'ruan', 'rui', 'run',
'ruo', 'sa', 'sai', 'san', 'sang', 'sao', 'se', 'sen', 'seng', 'sha',
'shai', 'shan', 'shang', 'shao', 'she', 'shei', 'shen', 'sheng', 'shi', 'shou',
'shu', 'shua', 'shuai', 'shuan', 'shuang', 'shui', 'shun', 'shuo', 'si', 'song',
'sou', 'su', 'suan', 'sui', 'sun', 'suo', 'ta', 'tai', 'tan', 'tang',
'tao', 'te', 'teng', 'ti', 'tian', 'tiao', 'tie', 'ting', 'tong', 'tou',
'tu', 'tuan', 'tui', 'tun', 'tuo', 'wa', 'wai', 'wan', 'wang', 'wei',
'wen', 'weng', 'wo', 'wu', 'xi', 'xia', 'xian', 'xiang', 'xiao', 'xie',
'xin', 'xing', 'xiong', 'xiu', 'xu', 'xuan', 'xue', 'xun', 'ya', 'yan',
'yang', 'yao', 'ye', 'yi', 'yin', 'ying', 'yo', 'yong', 'you', 'yu',
'yuan', 'yue', 'yun', 'za', 'zai', 'zan', 'zang', 'zao', 'ze', 'zei',
'zen', 'zeng', 'zha', 'zhai', 'zhan', 'zhang', 'zhao', 'zhe', 'zhei', 'zhen',
'zheng', 'zhi', 'zhong', 'zhou', 'zhu', 'zhua', 'zhuai', 'zhuan', 'zhuang', 'zhui',
'zhun', 'zhuo', 'zi', 'zong', 'zou', 'zu', 'zuan', 'zui', 'zun', 'zuo',
];
return terms;
def combineYinJie(self):
v = self.vowel();
c = self.consonant();
com_ = [];
for i in range(len(v)):
if (v[i][0] == 'i' or v[i][0] == 'u'):
continue;
else:
com_.append(v[i]);
for i in range(len(c)):
for j in range(len(v)):
com_.append(c[i]+v[j]);
return com_;
#从音节的录音中(.wav)处理出音节,把数据写入文件
def yinJieProcess(self, sFile):
fileSize = getFileSize(sFile);
if (fileSize < 44):
return;
c = [];
#每次读取1000个字节
perRead = 100;
with open(sFile, 'rb') as f:
#这是标准.wav的文件头长度
a = f.read(44);
index = 44;
start = 0;
end = 0;
spareCount = 0;
while (index < fileSize):
a = f.read(perRead);
b = list(a);
count = countZero(b[1:-1:2], 64);
if (count > perRead / 2* 17/20): #认为这是空白
index += perRead;
if (start == 0): #前端空白
continue;
else: #后端空白
end = index;
if (end -start < 5000):
continue;
else:
index = fileSize+1;
break;
else:
if (start == 0):
start = index; #正部开始
index += perRead;
validLength = end-start;
f.seek(start, 0);#第二个参数whence = 0表示从开头,=2表示从结尾倒着数
data = list(f.read(validLength));
f.close();
print('有效部分从{0}字节开始,到{1}字节结束,共有{2}个字节。即{3}秒到{4}秒'
.format(start ,end, validLength, round(start/22050, 3), round(end/22050, 3)));
dataArray = [];
for i in range(len(data)):
byte = struct.pack('B', data[i]);
dataArray.append(byte);
print('文件写入开始。请至tone.py查收>>>');
fout = open('tone.py', 'w');
fout.write('tone_X = [');
size_1 = len(dataArray);
for n in range(size_1):
fout.write(str(dataArray[n]));
if (n < size_1-1):
fout.write(', ');
fout.write('];');
fout.write('\\r\\n');
fout.close();
print('文件写入完毕。');
#把十进制数按照小尾字节序切割
def littleEndian(self, number, byte = 4):
result = [0]*byte;
for i in range(byte):
result[i] = number%256;
number//=256;
return result;
#把UltraEdit中的值字串转化为hex序列组
def hexExpr(self, string):
resultString = '';
size = len(string);
for i in range(size):
if (i == 0 ):
resultString += '0x'+string[i];
elif (string[i] == ' '):
resultString += ', 0x';
else:
resultString += string[i];
print(resultString);
###
# @usage 写.wav文件,能把声波数据阵列用二进制写成.wav。
# @author mw
# @date 2016年04月28日 星期四 14:31:34
# @param
# @return
#
###
def writeWav(self, filename = 'soundwave'):
byteArray = [];
dataArray = [];
#样本数据阵列
sampleArray = [];
idleWave = self.idleWave();
sampleArray = self.waveDataGen();
#样本数据点数
N = len(sampleArray);
times = 1;
dataSize = N*times;
fileSize = dataSize+44; #44为格式头部分所用字节数
#RIFF WAVE CHUNK
RIFF_ID = [0x52, 0x49, 0x46, 0x46]; #'RIFF'
RIFF_Size = self.littleEndian(fileSize-8, 4); #文件总字节数减去8
RIFF_Type = [0x57, 0x41,0x56, 0x45, 0x66, 0x6D, 0x74, 0x20]; #'WAVEfat '
#Format Chunk
Format_10_17 = [0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00];#过滤4+格式2+声道2=8个字节
Format_18_1B = [0x11, 0x2B, 0x00, 0x00]; #采样频率0x2B11 = 11025
Format_1C_1F = [0x22, 0x56, 0x00, 0x00]; #比持率 = 频率*通道*样本位 = 22050
Format_20_23 = [0x02, 0x00, 0x10, 0x00]; #块对齐 = 通道数* 样本位数 = 1*2 = 2
#Fact Chunk(optional)
#Data Chunk
Data_24_27 = [0x64, 0x61, 0x74, 0x61]; #'DATA'标记
Data_Size = self.littleEndian(fileSize-44, 4); #下面的Data部分的字节数,文件总字节数-44
#RIFF WAVE CHUNK
'''
for i in range(4):
byte = struct.pack('B', RIFF_ID[i]);
byteArray.append(byte);
'''
RIFF = [b'R', b'I', b'F', b'F'];
for i in range(4):
byteArray.append(RIFF[i]);
for i in range(4):
byte = struct.pack('B', RIFF_Size[i]);
byteArray.append(byte);
'''
for i in range(8):
byte = struct.pack('B', RIFF_Type[i]);
byteArray.append(byte);
#Format Chunk
for i in range(8):
byte = struct.pack('B', Format_10_17[i]);
byteArray.append(byte);
for i in range(4):
byte = struct.pack('B', Format_18_1B[i]);
byteArray.append(byte);
for i in range(4):
byte = struct.pack('B', Format_1C_1F[i]);
byteArray.append(byte);
for i in range(4):
byte = struct.pack('B', Format_20_23[i]);
byteArray.append(byte);
#Data Chunk
for i in range(4):
byte = struct.pack('B', Data_24_27[i]);
byteArray.append(byte);
'''
#Format_28_2B是数据块大小,formatHead是它前面的所有部分
formatHead = [b'W', b'A', b'V', b'E', b'f', b'm', b't', b' ', \\
b'\\x10', b'\\x00', b'\\x00', b'\\x00', b'\\x01', b'\\x00', b'\\x01', b'\\x00', \\
b'\\x11', b'+', b'\\x00', b'\\x00', \\
b'"', b'V', b'\\x00', b'\\x00', \\
b'\\x02', b'\\x00', b'\\x10', b'\\x00', \\
b'd', b'a', b't', b'a'];
for i in range(len(formatHead)):
byteArray.append(formatHead[i]);
for i in range(4):
byte = struct.pack('B', Data_Size[i]);
byteArray.append(byte);
#写出到文件
print('文件写入开始。>>>');
s = '';
s = filename+'.wav';
fout= open(s, 'wb');
size = len(byteArray);
for i in range(size):
fout.write(byteArray[i]);
size = len(sampleArray);
sizeIdle = len(idleWave);
for j in range(times):
for i in range(size):
fout.write(sampleArray[i]);
fout.close();
print('文件写入完毕。');
#声音数据的生成
def waveDataGen(self):
tian1 = voice.tian.tone_tian1;
di1 = voice.di.tone_di1;
xuan1 = voice.xuan.tone_xuan1;
huang1 = voice.huang.tone_huang1;
idle = self.idleWave();
data = [];
sArray = [ 'tian1', 'di1', 'xuan1', 'huang1' ,'di1', 'xuan1', 'huang1', 'tian1']*2;
for i in range(len(sArray)):
data += eval(sArray[i]);
data += idle*30;
return data;
def read():
fin = open('../input.txt', 'r');
array = [];
for line in fin.readlines():
if (line[0]!= '拼' and line[0].isalpha()):
array.append(line[:-1]);
#print(len(array));
fin.close();
for i in range(len(array)):
print('\\''+array[i]+'\\', ', end = '');
if (i%10 == 9):
print('');
#print(array);
def getFileSize(sFile):
size = getsize(sFile);
#print('文件{0}的大小是{1}字节'.format(sFile, size));
return size;
def countZero(array, judgeValue):
count = 0;
for i in range(len(array)):
if (abs(array[i]) < judgeValue):
count+=1;
return count;
def tmp():
if (0):
pron = Voice();
terms = pron.combineYinJie();
print('音节数共有{0}个'.format(len(terms)));
read();
'''
index = 2;
pron = Voice();
if (index == 1):
#把录音的音节去除空白部分
pron.yinJieProcess('huang1.wav');
elif (index == 2):
#写声音文件
pron.writeWav();
'''
if __name__ == '__main__':
tmp();
</span>
本节到此结束,欲知后事如何,请看下回分解。
以上是关于[从头读历史] 第281节 始制文字 世界上的语系及语言的主要内容,如果未能解决你的问题,请参考以下文章