伤寒论查询
import pandas as pd
import os
def main():
# 指定文件引用、保存的根目录
os.chdir(r'C:\0文件\医宗')
# 清空内存中的数据,Python 中无需此操作,因为变量的生命周期由引用计数管理
# 关闭所有打开的日志文件,Python 中没有直接对应的操作
# 设置用于使用的内存容量,Python 中通常由系统自动管理内存
# 关闭分屏输出选项,Python 中没有类似功能,结果会全部输出
# 设置矩阵的最大阶数,Python 中对于矩阵操作通常由 numpy 等库进行,也没有直接对应操作
# 导入 Excel 文件
df = pd.read_excel(r'C:\0文件\医宗\伤寒论-20240731.xlsx', sheet_name='条文+方剂+药物', dtype=str)
# 将所有的变量的显示设为 40 个字符串
df = df.astype({col: str(40) for col in df.columns})
# 删除 GD 和 GE 这两个变量
df = df.drop(columns=['GD', 'GE'])
# 保存名为 "伤寒论" 的 CSV 文件,若该文件已经存在,则替代原文件
df.to_csv('1-伤寒论.csv', index=False)
# 以下为方剂整理
# 保留第 400 行到最后一行的数据,其余数据自然就被删除
df = df.iloc[399:]
# 将数据转置
df = df.transpose()
# 删除第 1 行到第 52 行的数据
df = df.drop(df.index[:52])
# 删除变量 "_var180"
df = df.drop(columns=['_var180'])
# 删除第 123 行的数据
df = df.drop(df.index[122])
# 将所有变量的显示宽度设为 30 个字符
df = df.astype({col: str(30) for col in df.columns})
# 如果变量 "_var1" 的值为字符串 "aa 原始 ID" ,则用 "000" 替换之
df.loc[df['_var1'] == 'aa 原始 ID', '_var1'] = '000'
# 在第 2 到第 10 行中,在原来的字符串前加上 "00"
df.loc[2:10, '_var1'] = '00' + df.loc[2:10, '_var1']
# 在第 11 到第 100 行中,在原来的字符串前加上 "0"
df.loc[11:100, '_var1'] = '0' + df.loc[11:100, '_var1']
# 将变量为 "_var142" 到 "_var179" 的所有变量的第 1 行都替换为字符串 "000"
for col in df.columns[141:179]:
df.loc[1, col] = '000'
# 产生一个名为 "id" 的变量,且其值等于变量 "_var1" 的值
df['id'] = df['_var1']
# 将变量 id 的类型由字符串型改为数字型
df['id'] = pd.to_numeric(df['id'], errors='coerce')
# 产生一个名为 "idd" 的变量,且其值等于变量 "_var142" 的值。各个条目中的首个方剂对应的条目号
df['idd'] = df['_var142']
# 让变量 idd 和 id 的位置次序改为排在数据表中的最左侧,以便于在数据表中查看
df = df[['idd', 'id'] + [col for col in df.columns if col not in ['idd', 'id']]]
import pandas as pd
import os
def main():
# 保存文件
df = pd.DataFrame()
df.to_stata('1-伤寒论-方剂-横药纵方+无方条目-作基座.dta')
# 删除第 123 行到最后一行
df = df.drop(range(122, len(df)))
# 保存为另一个文件
df.to_stata('1-伤寒论-方剂-横药纵方-作附加.dta')
# 清除内存
df = None
# 调用文件 "伤寒论.dta"
df = pd.read_stata('1-伤寒论.dta')
# 删除第 400 行到第 557 行
df = df.drop(range(399, 578))
# 将 AK 变量的第 400 行值替换为空值
df.loc[399, 'AV'] = ''
# 将 AL 变量的第 400 行值替换为空值
df.loc[399, 'AW'] = ''
# 将 AM 变量的第 400 行值替换为空值
df.loc[399, 'AX'] = ''
# 将 AN 变量的第 400 行值替换为空值
df.loc[399, 'AY'] = ''
# 将 AO 变量的第 400 行值替换为空值
df.loc[399, 'AZ'] = ''
# 该循环将凡是第 400 行值为 "8*" 的变量删除
for var in df.columns:
if df.loc[399, var].startswith('8*'):
df = df.drop(columns=[var])
# 删除第 400 行。上面的循环是删除变量,本行则是删除第 400 行
df = df.drop(399)
# 变量 C 值为 "刘本条目编号",就替换为字符串 “000”
df.loc[df['C'] == '刘本条目编号', 'C'] = '000'
# C 变量实为刘本条目编号,现产生新的变量 idd,其值等于 C 的值
df['idd'] = df['C']
# 将变量 idd 排在最左侧,以便于在数据表中查看
df = df[['idd'] + [col for col in df.columns if col!= 'idd']]
# 保存为另一个不同的文件
df.to_stata('1-伤寒论-条文-横文纵号-作基座.dta')
# 删除 5 个指定变量
df = df.drop(columns=['AV', 'AW', 'AX', 'AY', 'AZ'])
# 保存为另一个不同的文件
df.to_stata('1-伤寒论-条文-横文纵号-作附加.dta')
########################### 以上基础工作 ###########################
# 清空内存
df = None
# 调用文件
df = pd.read_stata('1-伤寒论-方剂-横药纵方+无方条目-作基座.dta')
# 保存为另一个不同的文件
df.to_stata('1-伤寒论-方剂-横药纵方+无方条目-作基座-142.dta')
# 该命令为追踪运行轨迹,便于发现问题。其两侧加了说明符号,则不运行
for i in range(142, 180):
df = None
# 调用文件
df = pd.read_stata(r"C:\0文件\医宗\1-伤寒论-条文-横文纵号-作附加.dta")
# 每次循环要用的文件中,变量名必须不同,否则下面的“merge”运行后,这个附加文件就合并不进去
df.columns = [f'{col}_{i}' for col in df.columns]
# 将变量 idd_`i' 重命名为“idd”。实为刘本条目编号
df = df.rename(columns={f'idd_{i}': 'idd'})
# 将变量 idd 由字符串型改为数字型
df['idd'] = pd.to_numeric(df['idd'], errors='coerce')
# 将变量 idd 在数据表中的位次调整到最左侧
df = df.sort_values(by='idd')
# 保存文件
df.to_stata(f'1-伤寒论-条文-横文纵号-作附加-{i}.dta')
df = None
# 调用文件
df = pd.read_stata(f"C:\0文件\医宗\1-伤寒论-方剂-横药纵方+无方条目-作基座-{i}.dta")
# 将变量由字符串型改为数字型
df[f'_var{i}'] = pd.to_numeric(df[f'_var{i}'], errors='coerce')
# 将变量由字符串型改为数字型
df['idd'] = pd.to_numeric(df['idd'], errors='coerce')
# 将 idd 的值替换为 _var`i' 的值
df['idd'] = df[f'_var{i}']
# 以 idd 的值排序,默认升序
df = df.sort_values(by='idd')
# 将文件“伤寒论-条文-横文纵号-作附加-`i'”的数据合并到当下内存的数据的右侧,两个数据集中的接头变量为 idd,m:1 表示是多对一的合并
df = pd.merge(df, pd.read_stata(f'1-伤寒论-条文-横文纵号-作附加-{i}.dta'), on='idd', how='left')
# 删除每次循环时数据合并的信息变量。否则循环不了
df = df.drop(columns=['_merge'])
# 如果变量 _var4 在某行为空值,而且变量 _var1 在该行的值不为“000”,则删除该行
df = df.drop(df[(df['_var4'] == '') & (df['_var1']!= '000')].index)
j = i + 1
# 保存文件
df.to_stata(f'1-伤寒论-方剂-横药纵方+无方条目-作基座-{j}.dta')
# 删除根目录中的文件
os.remove(f'1-伤寒论-方剂-横药纵方+无方条目-作基座-{i}.dta')
# 删除根目录中的文件
os.remove(f'1-伤寒论-条文-横文纵号-作附加-{i}.dta')
# 保存文件
df.to_stata('1-伤寒论-方剂合条文-从左上往右看.dta')
# 删除根目录中的文件
os.remove('1-伤寒论-方剂-横药纵方+无方条目-作基座-180.dta')
import pandas as pd
import os
def main():
# 读取数据文件,假设数据文件已经存在,这里先使用一个示例数据结构
df = pd.DataFrame()
# 对于以下 Stata 代码的 Python 实现:
# foreach v of varlist _var142-_var179 {
# tostring `v',replace
# }
for v in [f'_var{i}' for i in range(142, 180)]:
df[v] = df[v].astype(str)
# 对于以下 Stata 代码的 Python 实现:
# foreach v of varlist _all {
# local newname = subinstr("`v'", "_var", "尾接",.)
# rename `v' `newname'
# }
df.rename(columns={f'_var{i}': f'尾接{i}' for i in range(1, len(df.columns) + 1)}, inplace=True)
# 对于以下 Stata 代码的 Python 实现:
# foreach v of varlist _all {
# local newname = subinstr("`v'", "_", "连接",.)
# rename `v' `newname'
# }
df.rename(columns={col: col.replace('_', '连接') for col in df.columns}, inplace=True)
# 铁的教训:待转置的数据集,其变量名不能带”下短横“即”_"
# 否则,转置不了。只提示变量名有问题。
df.sort_values(by='id', inplace=True)
df = df[['id'] + [col for col in df.columns if col!= 'id']]
# 对于以下 Stata 代码的 Python 实现:
# tostring id,replace /*将 id 由数字型改为字符串型 */
# tostring idd,replace
df['id'] = df['id'].astype(str)
df['idd'] = df['idd'].astype(str)
column_order = ['id', 'idd'] + [f'尾接{i}' for i in [1, 2, 3, 130, 131, 132]]
df = df[column_order]
df.to_stata('1-伤寒论-方剂合条文-从左往右看.dta')
# 对于以下 Stata 代码的 Python 实现:
# clear
# use 1-伤寒论-方剂合条文-从左往右看.dta
# drop A连接158-AQ连接179
df = pd.read_stata('1-伤寒论-方剂合条文-从左往右看.dta')
columns_to_drop = [f'A连接{i}' for i in range(158, 180)]
df.drop(columns=columns_to_drop, inplace=True)
# 对于以下 Stata 代码的 Python 实现:
# sxpose2 ,clear /*转置之一*/
# foreach v of varlist _all {
# format `v' %40s
# }
# save 1-伤寒论-方剂合条文-从上往下看1.dta,replace
df_transposed_1 = df.transpose()
for v in df_transposed_1.columns:
df_transposed_1[v] = df_transposed_1[v].astype(str).str.ljust(40)
df_transposed_1.to_stata('1-伤寒论-方剂合条文-从上往下看1.dta')
# 对于以下 Stata 代码的 Python 实现:
# clear
# use 1-伤寒论-方剂合条文-从左往右看.dta
# drop 尾接1-AL连接157
# sxpose2 ,clear /*转置之二*/
# foreach v of varlist _all {
# format `v' %40s
# }
# save 1-伤寒论-方剂合条文-从上往下看2.dta,replace
df = pd.read_stata('1-伤寒论-方剂合条文-从左往右看.dta')
columns_to_drop = [f'尾接{i}' for i in range(1, 158)]
df.drop(columns=columns_to_drop, inplace=True)
df_transposed_2 = df.transpose()
for v in df_transposed_2.columns:
df_transposed_2[v] = df_transposed_2[v].astype(str).str.ljust(40)
df_transposed_2.to_stata('1-伤寒论-方剂合条文-从上往下看2.dta')
# 对于以下 Stata 代码的 Python 实现:
# clear
# use 1-伤寒论-方剂合条文-从上往下看1.dta
# append using 1-伤寒论-方剂合条文-从上往下看2.dta
# save 1-伤寒论-方剂合条文-从上往下看.dta,replace
df_transposed_1 = pd.read_stata('1-伤寒论-方剂合条文-从上往下看1.dta')
df_transposed_2 = pd.read_stata('1-伤寒论-方剂合条文-从上往下看2.dta')
df_combined = df_transposed_1.append(df_transposed_2)
df_combined.to_stata('1-伤寒论-方剂合条文-从上往下看.dta')
# 对于以下 Stata 代码的 Python 实现:
# drop in 1/2
# forvalues i = 1(1)132 {
# local j=`i'-1
# rename _var`i' v`j'
# }
df_combined.drop([0, 1], inplace=True)
for i in range(1, 133):
j = i - 1
df_combined.rename(columns={f'_var{i}': f'v{j}'}, inplace=True)
# 对于以下 Stata 代码的 Python 实现:
# gen id=_n
# order id
df_combined['id'] = range(1, len(df_combined) + 1)
df_combined = df_combined[['id'] + [col for col in df_combined.columns if col!= 'id']]
# 对于以下 Stata 代码的 Python 实现:
# replace v0="方剂拼音顺序" if id==1
# drop if id==2
# replace v0="方剂出场顺序" if id==3
# replace v0="拼音首字无重" if id==6
# replace v0="药物↓↓" if id==7
# replace v0="将息法 →" if id==97
# drop if v0=="正文全"
# drop if v0=="郭注全"
# drop if v0=="郝讲全"
# drop if v0=="倪讲全"
# drop if v0=="胡解全"
# drop if v0=="方剂辨析全"
# drop if v0=="其他注全"
# drop if v0=="zz将息法"
# drop if v0=="总ID"
# drop if v0=="49"|v0=="刘本条目-公式"|v0=="刘本条目"
df_combined.loc[df_combined['id'] == 1, 'v0'] = "方剂拼音顺序"
df_combined.drop(df_combined[df_combined['id'] == 2].index, inplace=True)
df_combined.loc[df_combined['id'] == 3, 'v0'] = "方剂出场顺序"
df_combined.loc[df_combined['id'] == 6, 'v0'] = "拼音首字无重"
df_combined.loc[df_combined['id'] == 7, 'v0'] = "药物↓↓"
df_combined.loc[df_combined['id'] == 97, 'v0'] = "将息法 →"
columns_to_drop = ["正文全", "郭注全", "郝讲全", "倪讲全", "胡解全", "方剂辨析全", "其他注全", "zz将息法", "总ID", "49", "刘本条目-公式", "刘本条目"]
df_combined.drop(df_combined[df_combined['v0'].isin(columns_to_drop)].index, inplace=True)
# 对于以下 Stata 代码的 Python 实现:
# replace v0="刘本条目编号" if v0=="000"
# replace v0="方剂歌 →" if v0=="方剂歌"
# drop if v0=="0"
df_combined.loc[df_combined['v0'] == '000', 'v0'] = "刘本条目编号"
df_combined.loc[df_combined['v0'] == '方剂歌', 'v0'] = "方剂歌 →"
df_combined.drop(df_combined[df_combined['v0'] == '0'].index, inplace=True)
# 对于以下 Stata 代码的 Python 实现:
# foreach v of varlist v1-v131 {
# format `v' %-30s
# replace `v'=`v'+":"+`v'[6] in 1
# replace `v'="剂量↓↓ 方剂:"+`v' if id==7
# replace `v'="→ → "+ `v' if id==97
# replace `v'=" "+ `v' if id==116
# }
for v in [f'v{i}' for i in range(1, 132)]:
df_combined[v] = df_combined[v].astype(str).str.ljust(30)
df_combined.loc[0, v] = df_combined.loc[0, v] + ":" + df_combined.loc[6, v]
if 7 in df_combined.index:
df_combined.loc[7, v] = "剂量↓↓ 方剂:" + df_combined.loc[7, v]
if 97 in df_combined.index:
df_combined.loc[97, v] = "→ → " + df_combined.loc[97, v]
if 116 in df_combined.index:
df_combined.loc[116, v] = " " + df_combined.loc[116, v]
# 对于以下 Stata 代码的 Python 实现:
# replace v0="正文 →" if v0=="正文"
# foreach v in 2 3 4 5 6 7 8 9 10 11 12 13 14 15 {
# foreach var of varlist v1-v131 {
# replace `var' = " " + `var' if v0=="`v'" &`var'!=""
# }
# replace v0="" if v0=="`v'"
# }
df_combined.loc[df_combined['v0'] == '正文', 'v0'] = "正文 →"
for v in range(2, 16):
for var in [f'v{i}' for i in range(1, 132)]:
df_combined.loc[(df_combined['v0'] == str(v)) & (df_combined[var]!= ""), var] = " " + df_combined.loc[(df_combined['v0'] == str(v)) & (df_combined[var]!= ""), var]
df_combined.loc[df_combined['v0'] == str(v), 'v0'] = ""
# 对于以下 Stata 代码的 Python 实现:
# foreach v of varlist v1-v131 {
# replace `v'=`v'[_n-1]+"." +`v' if v0=="正文 →" & `v'!=""
# replace `v'=" " +`v' if v0=="方剂歌"
# }
for v in [f'v{i}' for i in range(1, 132)]:
df_combined.loc[(df_combined['v0'] == "正文 →") & (df_combined[v]!= ""), v] = df_combined.loc[(df_combined['v0'] == "正文 →") & (df_combined[v]!= ""), v].shift(1) + "." + df_combined.loc[(df_combined['v0'] == "正文 →") & (df_combined[v]!= ""), v]
df_combined.loc[df_combined['v0'] == "方剂歌", v] = " " + df_combined.loc[df_combined['v0'] == "方剂歌", v]
df_combined.to_stata('1-伤寒论-方剂合条文-从上往下看.dta')
# 对于以下 Stata 代码的 Python 实现:
# clear
# use 1-伤寒论-方剂合条文-从上往下看.dta
# sort id
df = pd.read_stata('1-伤寒论-方剂合条文-从上往下看.dta')
df.sort_values(by='id', inplace=True)
# 对于以下 Stata 代码的 Python 实现:
# forvalues i = 1(1)131 { /*输入方剂名拼音,或输入无方条文所在六经拼音*/
# local s="xcht"
# li v0 v`i' if v`i'[5]=="`s'" & v`i'!="" & v`i'!="."
# } /*输出方剂内容及对应条文,或输出六经下的无方条文*/
s = "xcht"
for i in range(1, 132):
print(df.loc[(df[f'v{i}'].str[4] == s) & (df[f'v{i}']!= "") & (df[f'v{i}']!= "."), ['v0', f'v{i}']])
# 对于以下 Stata 代码的 Python 实现:
# forvalues i = 1(1)131 { /*输入方剂名拼音,或输入无方条文所在六经拼音*/
# local s="bhjrc"
# li v0 v`i' if v`i'[5]=="`s'" & ////*输出方剂内容及对应条文,或输出六经下的无方条文*/
#!missing(v`i') & v`i'!="."
# }
s = "bhjrc"
for i in range(1, 132):
print(df.loc[(df[f'v{i}'].str[4] == s) & (df[f'v{i}'].notna()) & (df[f'v{i}']!= "."), ['v0', f'v{i}']])
def main():
# 保存文件 1-伤寒论-条文-横文纵号-作基座-用.dta
df = pd.DataFrame()
df.to_stata('1-伤寒论-条文-横文纵号-作基座-用.dta')
# 遍历 AV, AW, AX, AY, AZ 变量
for v in ['AV', 'AW', 'AX', 'AY', 'AZ']:
# 使用 1-伤寒论-条文-横文纵号-作基座-用.dta
df = pd.read_stata('1-伤寒论-条文-横文纵号-作基座-用.dta')
# 替换 idd 列的值为 v 的值
df['idd'] = df[v]
# 保存文件 1-伤寒论-条文-横文纵号-作基座-用v.dta
df.to_stata(f'1-伤寒论-条文-横文纵号-作基座-用{v}.dta')
df = None
# 使用 1-伤寒论-方剂-横药纵方-作附加.dta
df = pd.read_stata('1-伤寒论-方剂-横药纵方-作附加.dta')
# 重命名列
df.rename(columns={col: f'{v}{col}' for col in df.columns}, inplace=True)
df.rename(columns={f'{v}idd': 'idd'}, inplace=True)
# 替换 idd 的值为 v_var4 的值
df['idd'] = df[f'{v}_var4']
# 对 idd 列排序
df = df.sort_values(by='idd')
# 保存文件 1-伤寒论-方剂-横药纵方-作附加-v.dta
df.to_stata(f'1-伤寒论-方剂-横药纵方-作附加-{v}.dta')
df = None
# 使用 1-伤寒论-条文-横文纵号-作基座-用v.dta
df = pd.read_stata(f'1-伤寒论-条文-横文纵号-作基座-用{v}.dta')
# 合并文件
df = pd.merge(df, pd.read_stata(f'1-伤寒论-方剂-横药纵方-作附加-{v}.dta'), on='idd', how='left')
# 重命名 _merge 列
df.rename(columns={'_merge': f'm_{v}'}, inplace=True)
# 保存文件 1-伤寒论-条文-横文纵号-作基座-用.dta
df.to_stata('1-伤寒论-条文-横文纵号-作基座-用.dta')
# 删除文件 1-伤寒论-条文-横文纵号-作基座-用v.dta
os.remove(f'1-伤寒论-条文-横文纵号-作基座-用{v}.dta')
# 删除文件 1-伤寒论-方剂-横药纵方-作附加-v.dta
os.remove(f'1-伤寒论-方剂-横药纵方-作附加-{v}.dta')
# 保存文件 1-伤寒论-条文合方剂-从上往下看.dta
df.to_stata('1-伤寒论-条文合方剂-从上往下看.dta')
# 删除文件 1-伤寒论-条文-横文纵号-作基座-用.dta
os.remove('1-伤寒论-条文-横文纵号-作基座-用.dta')
# 筛选数据
df = df[(df['m_AV'] == 1) | (df['m_AV'] == 3) | (df['m_AW'] == 3) | (df['m_AX'] == 3) | (df['m_AY'] == 3) | (df['m_AZ'] == 3) |
(df['AV_var4'] == 'ad方剂名') | (df['AW_var4'] == 'ad方剂名') | (df['AX_var4'] == 'ad方剂名') |
(df['AY_var4'] == 'ad方剂名') | (df['AZ_var4'] == 'ad方剂名')]
# 按 B 和 C 列排序
df = df.sort_values(by=['B', 'C'])
# 生成 fangjishu1 列
df['fangjishu1'] = df['m_AV'] + df['m_AW'] + df['m_AX'] + df['m_AY'] + df['m_AZ']
# 对 fangjishu1 列排序
df = df.sort_values(by='fangjishu1')
# 生成 fangjishu2 列
df['fangjishu2'] = 0
df.loc[df['fangjishu1'] == 15, 'fangjishu2'] = 5
df.loc[df['fangjishu1'] == 13, 'fangjishu2'] = 4
df.loc[df['fangjishu1'] == 11, 'fangjishu2'] = 3
df.loc[df['fangjishu1'] == 9, 'fangjishu2'] = 2
df.loc[df['fangjishu1'] == 7, 'fangjishu2'] = 1
# 打印部分数据
print(df.loc[df['fangjishu1'] == 6, ['m_AV', 'm_AW', 'm_AX', 'm_AY', 'm_AZ']])
print(df.loc[df['m_AY'] == 2, ['m_AV', 'm_AW', 'm_AX', 'm_AY', 'm_AZ']])
print(df.loc[:, ['fangjishu1', 'C', 'm_AV', 'm_AW', 'm_AX', 'm_AY', 'm_AZ']])
print(df.loc[df['fangjishu2'] == 3, ['fangjishu2', 'B', 'C']])
# 重命名列,将 _ 替换为连接
df.rename(columns={col: col.replace('_', '连接') for col in df.columns}, inplace=True)
# 删除部分列
df.drop(columns=['m连接AV', 'm连接AW', 'm连接AX', 'm连接AY', 'm连接AZ', 'AVid', 'AWid', 'AXid', 'AYid', 'AZid'], inplace=True)
# 将 fangjishu1 和 fangjishu2 列转换为字符串类型
df['fangjishu1'] = df['fangjishu1'].astype(str)
df['fangjishu2'] = df['fangjishu2'].astype(str)
# 重命名 A 到 AZ 的列
df.rename(columns={col: f'大国{col}' for col in df.columns if col in [chr(i) for i in range(ord('A'), ord('Z') + 1)]}, inplace=True)
# 生成 id 列
df['id'] = df['大国C']
# 将 id 列转换为数字类型
df['id'] = pd.to_numeric(df['id'], errors='coerce')
# 按 id 排序
df = df.sort_values(by='id')
# 将 id 列移到最左边
df = df[['id'] + [col for col in df.columns if col!= 'id']]
# 将 id 列转换为字符串类型
df['id'] = df['id'].astype(str)
# 转置数据
df_transposed = df.transpose()
# 格式化列数据
for v in df_transposed.columns:
df_transposed[v] = df_transposed[v].astype(str).str.ljust(40)
# 替换 _var1 的值
df_transposed['_var1'] = df_transposed['_var1'] + df_transposed['_var400'] + df_transposed['_var401'] + df_transposed['_var402'] + df_transposed['_var403'] + df_transposed['_var404']
# 删除第 4 行
df_transposed = df_transposed.drop(3)
# 替换 _var1 的值
df_transposed.loc[0, '_var1'] = "刘本条目编号"
df_transposed.loc[1, '_var1'] = "该条文含方数"
df_transposed.loc[2, '_var1'] = "五方并值总数"
# 生成 id 列
df_transposed['id'] = range(len(df_transposed))
# 将 id 列移到最左边
df_transposed = df_transposed[['id'] + [col for col in df_transposed.columns if col!= 'id']]
# 删除部分行
df_transposed = df_transposed[~df_transposed['_var1'].isin(["正文全", "郭注全", "其他注全", "zz将息法", "倪讲全", "胡解全", "郝讲全", "方剂辨析全", "49", "刘本条目-公式", "刘本条目", "", "", ""])]
# 重命名 _var 列
for i in range(1, 405):
df_transposed.rename(columns={f'_var{i}': f'v{i - 1}'}, inplace=True)
# 替换 v0 的值
for i in range(2, 15):
df_transposed.loc[df_transposed['v0'] == str(i), 'v0'] = ""
df_transposed.loc[df_transposed['v0'] == '000', 'v0'] = "刘本条目编号"
df_transposed.loc[df_transposed['v0'] == 'ac出场ID调整', 'v0'] = "方剂出场顺序"
df_transposed.loc[df_transposed['v0'] == 'ad方剂名', 'v0'] = "药物↓ 方剂→"
df_transposed.loc[df_transposed['v0'] == '将息法片1', 'v0'] = "将息法 →"
df_transposed.loc[df_transposed['v0'] == '正文', 'v0'] = "正文 →"
df_transposed.loc[df_transposed['v0'] == '拼音首字5字校正', 'v0'] = "拼音首字无重"
# 删除部分行
df_transposed = df_transposed[df_transposed['v0']!= 'ab出场ID']
# 格式化 v1 到 v398 列
for v in [f'v{i}' for i in range(1, 399)]:
df_transposed[v] = df_transposed[v].astype(str).str.ljust(40)
if df_transposed.loc[1, v] == '1':
df_transposed.loc[1, v] = df_transposed.loc[1, v] + ":" + df_transposed.loc[40, v]
elif df_transposed.loc[1, v] == '2':
df_transposed.loc[1, v] = df_transposed.loc[1, v] + ":" + df_transposed.loc[40, v] + "," + df_transposed.loc[41, v]
elif df_transposed.loc[1, v] == '3':
df_transposed.loc[1, v] = df_transposed.loc[1, v] + ":" + df_transposed.loc[40, v] + "," + df_transposed.loc[41, v] + "," + df_transposed.loc[42, v]
elif df_transposed.loc[1, v] == '4':
df_transposed.loc[1, v] = df_transposed.loc[1, v] + ":" + df_transposed.loc[40, v] + "," + df_transposed.loc[41, v] + "," + df_transposed.loc[42, v] + "," + df_transposed.loc[43, v]
elif df_transposed.loc[1, v] == '5':
df_transposed.loc[1, v] = df_transposed.loc[1, v] + ":" + df_transposed.loc[40, v] + "," + df_transposed.loc[41, v] + "," + df_transposed.loc[42, v] + "," + df_transposed.loc[43, v] + "," + df_transposed.loc[44, v]
# 替换 v29 和 v30 的值
df_transposed.loc[1, 'v29'] = "5:桂枝,甘草干姜,芍药甘草,调胃承气,四逆"
df_transposed.loc[1, 'v30'] = "4:桂枝加附子,甘草干姜,芍药甘草,大承气"
# 删除部分行
df_transposed = df_transposed.drop(df_transposed[df_transposed.index >= 42].index)
# 保存文件 1-伤寒论-条文合方剂-从上往下看.dta
df_transposed.to_stata('1-伤寒论-条文合方剂-从上往下看.dta')
# 使用 1-伤寒论-条文合方剂-从上往下看.dta
df = pd.read_stata('1-伤寒论-条文合方剂-从上往下看.dta')
# 输入刘本条目编号
x = 96
# 输出该条对应条文及其中方剂
print(df.loc[~df[f'v{x}'].isna(), ['v0', f'v{x}']])
# 使用 1-伤寒论-条文合方剂-从上往下看.dta
df = pd.read_stata('1-伤寒论-条文合方剂-从上往下看.dta')
# 遍历 v1 到 v398 列
for v in [f'v{i}' for i in range(1, 399)]:
df[f'b{v}'] = df[v].str.contains('齐')
df[f'j{v}'] = df[f'b{v}'].mean()
print(df.loc[~df[v].isna() & (df[f'j{v}']!= 0), ['v0', v]])
# 使用 1-伤寒论-方剂合条文-从上往下看.dta
df = pd.read_stata('1-伤寒论-方剂合条文-从上往下看.dta')
# 按 id 排序
df = df.sort_values(by='id')
# 输入方剂名拼音,或输入无方条文所在六经拼音
s = "dwcqt"
for i in range(1, 132):
print(df.loc[(df[f'v{i}'].str[4] == s) & (df[f'v{i}']!= "") & (df[f'v{i}']!= "."), ['v0', f'v{i}']])
# 使用 1-伤寒论-方剂合条文-从上往下看.dta
df = pd.read_stata('1-伤寒论-方剂合条文-从上往下看.dta')
# 遍历 v1 到 v131 列
for v in [f'v{i}' for i in range(1, 132)]:
df[f'b{v}'] = df[v].str.contains('后背')
df[f'j{v}'] = df[f'b{v}'].mean()
print(df.loc[~df[v].isna() & (df[f'j{v}']!= 0), ['v0', v]])
if __name__ == "__main__":
main()
发布于 2024-12-22