Dev metacog/DevCaJournal

word cloud 개발일지

잘 배우고, 잘 익히기 2021. 9. 16. 00:41

ㅇ 핵심 기능 : 데이터 읽기, 데이터 클렌징(replace함수), WordCloud(wordcloud 패키지), masked word cloud 

# 파일 쓰기
#
# f = open("test.txt", "w", encoding="utf-8")
# f.write("안녕, 스파르타!\n")
# for i in [1,2,3,4,5]:
#     f.write(f'{i} 번째 좋아요\n')
#
# f.close()


# 파일 읽기
# with open("test.txt", "r", encoding="utf-8") as f:
#     lines = f.readlines()
#     for line in lines:
#         print(line)
#

# # 파일 읽기
# text = ''
# with open("test.txt", "r", encoding="utf-8") as f:
#     lines = f.readlines()
#     for line in lines:
#         text += line
#
# print(text)

# # 이용 가능한 폰트 중 '고딕'만 선별

# import matplotlib.font_manager as fm
#
# for font in fm.fontManager.ttflist:
#     if 'Gothic' in font.name:
#         print(font.name, font.fname)

# 워드 클라우드
# text = ''
# with open("kakaotalk.txt", "r", encoding="utf-8") as f:
#     lines = f.readlines()
#     for line in lines:
#         text += line
#
#
# from wordcloud import WordCloud
#
# wc = WordCloud(font_path='C:/Windows/Fonts/malgunsl.ttf', background_color="white", width=600, height=400)
# wc.generate(text)
# wc.to_file("result.png")

# 클렌징
text = ''
with open("kakaotalk.txt", "r", encoding="utf-8") as f:
    lines = f.readlines()
    for line in lines[5:]:
        if '] [' in line:
            text += line.split('] ')[2].replace('ㅋ', '').replace('ㅠ', '').replace('ㅜ', '').replace('이모티콘\n', '').replace('사진\n', '').replace('삭제된 메시지입니다.', '')
# print(text)

#
# from wordcloud import WordCloud
#
# wc = WordCloud(font_path='C:/Windows/Fonts/malgunsl.ttf', background_color="white", width=600, height=400)
# wc.generate(text)
# wc.to_file("result.png")



# 마스크드 word cloud
from wordcloud import WordCloud
from PIL import Image
import numpy as np

mask = np.array(Image.open('cloud.png'))
wc = WordCloud(font_path='C:/Windows/Fonts/malgunsl.ttf', background_color="white", mask=mask)
wc.generate(text)
wc.to_file("result_masked.png")