개발일지/Big Data
[Pandas] 가장 많은 리뷰를 받은 `n`개의 음식점
zzoo-ppaamm
2020. 9. 2. 22:40
json 파일 불러오기
try:
with open(data_path, encoding="utf-8") as f:
data = json.loads(f.read())
except FileNotFoundError as e:
print(f"`{data_path}` 가 존재하지 않습니다.")
data.json
[{"id": 0,
"name": "",
"branch": "",
"area": "",
"tel": "",
"address": "",
"latitude": "",
"longitude": "",
"category_list": [{"category": ""}, ...],
"menu_list": [{"menu": "a", "price": 0}, ...],
"bhour_list": [],
"review_cnt": 2,
"review_list": [{
"writer_info": {"id": 389728, "gender": "여", "born_year": "1993"},
"review_info": {"id": 2, "score": 5, "content": "", "reg_time": "1970-01-01 00:00:00"}
}, ...]
json to DataFrame
stores = [] # 음식점 테이블
reviews = [] # 리뷰 테이블
users = []
menus = []
def import_data(data_path=DATA_FILE):
"""
Req. 1-1-1 음식점 데이터 파일을 읽어서 Pandas DataFrame 형태로 저장합니다
"""
try:
with open(data_path, encoding="utf-8") as f:
data = json.loads(f.read())
except FileNotFoundError as e:
print(f"`{data_path}` 가 존재하지 않습니다.")
exit(1)
stores = [] # 음식점 테이블
reviews = [] # 리뷰 테이블
users = []
menus = []
menu_index = 0
for d in data:
categories = [c["category"] for c in d["category_list"]]
stores.append(
[
d["id"],
d["name"],
d["branch"],
d["area"],
d["tel"],
d["address"],
d["latitude"],
d["longitude"],
"|".join(categories),
]
)
for review in d["review_list"]:
r = review["review_info"]
u = review["writer_info"]
reviews.append(
[r["id"], d["id"], u["id"], r["score"], r["content"], r["reg_time"]]
)
users.append(
[u["id"], u["gender"], u["born_year"]]
)
for m in d["menu_list"]:
menus.append(
[menu_index, d["id"], m["menu"], m["price"]]
)
menu_index += 1
store_frame = pd.DataFrame(data=stores, columns=store_columns)
review_frame = pd.DataFrame(data=reviews, columns=review_columns)
user_frame = pd.DataFrame(data=users, columns=user_columns)
menu_frame = pd.DataFrame(data=menus, columns=menu_columns)
return {"stores": store_frame, "reviews": review_frame, "users": user_frame, "menus": menu_frame}
DataFrame 저장 & 불러오기
def dump_dataframes(dataframes):
pd.to_pickle(dataframes, DUMP_FILE)
def load_dataframes():
return pd.read_pickle(DUMP_FILE)
DataFrame 정제
def get_most_reviewed_stores(dataframes, n=20):
"""
Req. 1-2-3 가장 많은 리뷰를 받은 `n`개의 음식점을 정렬하여 리턴합니다
"""
stores = dataframes["stores"]
reviews = dataframes["reviews"]
# grouping by store & counting
stores_reviews_count = reviews.groupby("store").size()
# sorting by desc & heading n numbers
stores_reviews_count_top = stores_reviews_count.sort_values(ascending=False).head(n=n)
# Series to DataFrame
df_stores_reviews_count_top = pd.DataFrame({'count': stores_reviews_count_top}).reset_index()
# merged stores into review_count
result = pd.merge(df_stores_reviews_count_top, stores, left_on='store', right_on='id', how='left')
return result
출력
def main():
data = load_dataframes()
term_w = shutil.get_terminal_size()[0] - 1
separater = "-" * term_w
# 최다 리뷰 음식점 출력
stores_most_reviews = get_most_reviewed_stores(data)
print("[최다 리뷰 음식점]")
print(f"{separater}\n")
for i, store in stores_most_reviews.iterrows():
print(' {}위 : {}({}개)'.format(i + 1, store['store_name'], store['count']))
print(f"{separater}\n")
if __name__ == "__main__":
main()
+Python 출력 format
print('Water boils at %d degrees %s' % (temperature, measure)) # 1
print('Water boils at {} degrees {}'.format(temperature, measure)) # 2
print(f'Water boils at {temperature} degrees {measure}') # 3