教育数据知识图谱创建
教育知识图谱之创建,利用MOOPer数据集导入neo4j数据库
知识图谱
创建代码
base_url = 'F:\\dev\\flask\\f050-web-edu\\mooper-scripts\\MOOPer'def delete_all():graph.run("MATCH(n) DETACH DELETE(n)")def remove_illegal_chars(text):if isinstance(text, str):# 使用正则表达式替换非法字符# cleaned_text = re.sub(r'[^\x00-\x7F]+', '', text)pattern = r'[^\w\s]' # 匹配非字母数字和空白字符cleaned_text = re.sub(pattern, '', text) # 替换非法字符为''return cleaned_textelse:return textdef import_challenge():# 节点名node = 'challenge'# 读取csv文件data = pd.read_csv(f'{base_url}\\knowledgeGraph\\entity\\challenge.csv')# 显示前5行数据# print(data.head())# challenge_id, name, task_pass, answer, score, difficulty, praises_countfor index, row in data.iterrows():print(row['name'])task_pass = remove_illegal_chars(row['task_pass'])answer = remove_illegal_chars(row['answer'])difficulty = remove_illegal_chars(row['difficulty'])cql = "MERGE (a:%s {name: '%s', challenge_id: '%s', task_pass: '%s'," \" answer: '%s', score:'%s', difficulty:'%s', praises_count:'%s' }) " \% (node, row['name'], row['challenge_id'], task_pass,answer, row['score'], difficulty, row['praises_count'])try:graph.run(cql)except Exception as e:print(e)print('出错,但是继续')def import_chapter():# 节点名node = 'chapter'# 读取csv文件data = pd.read_csv(f'{base_url}\\knowledgeGraph\\entity\\chapter.csv')#chapter_id,name,description,created_atfor index, row in data.iterrows():print(row['name'])description = remove_illegal_chars(row['description'])cql = "MERGE (a:%s {name: '%s', chapter_id: '%s', description: '%s'," \" created_at: '%s' }) " \% (node, row['name'], row['chapter_id'], description,row['created_at'])try:graph.run(cql)except Exception as e:print(e)print('出错,但是继续')def import_course():# course_id, name, description, visits, created_at, learning_notes, publish_time# 节点名node = 'course'# 读取csv文件data = pd.read_csv(f'{base_url}\\knowledgeGraph\\entity\\course.csv')for index, row in data.iterrows():print(row['name'])description = remove_illegal_chars(row['description'])cql = "MERGE (a:%s {name: '%s', course_id: '%s', description: '%s'," \" created_at: '%s', visits: '%s' , learning_notes: '%s', publish_time: '%s'}) " \% (node, row['name'], row['course_id'], description,row['created_at'], row['visits'],row['learning_notes'],row['publish_time'],)try:graph.run(cql)except Exception as e:print(e)print('出错,但是继续')