面试信息处理Python脚本

import pandas as pd

xls_path = r'C:\Users\xxx\Desktop\面经_搜索_socket.xlsx'
key_word = 'socket'
output_file = r'E:\myfile.txt'
items_per_file = 20

df = pd.read_excel(xls_path, sheet_name='datatable')
def hasKeyWord(line):
    return key_word in line.lower()

line_cnt = 0
node_idx = 0

with open(output_file, 'w', encoding='utf-8') as f:
    f.write(str(int(node_idx)) + '\n')
    for index, row in df.iterrows():
        lines = row['正文'].split("\n")
        flt_lines = filter(hasKeyWord, lines)
        for line in flt_lines:
            outLine = line + '\n\t\t链接\n\t\t\t' + row['链接'] + '\n'
            outLine = outLine.lstrip()
            outLine = '\t' + outLine
            print(outLine)
            f.write(outLine)
            line_cnt += 1
            if line_cnt % items_per_file == 0:
                node_idx = line_cnt / items_per_file
                f.write(str(int(node_idx)) + '\n')