데이터 분석

DataFrame - 객체의 결합 및 sorting

핸들이없는8톤트럭 2022. 8. 15. 15:46
반응형

객체의 결합 및 sorting

import numpy as np
import pandas as pd

data1 = {'student_id':[101,102,103,104,105,106],
         'math_score' : [60,64,57,54,76,86]}
data2 = {'student_id':[107,108,109],
          'math_score':[66,45,76]}
data3 = {'student_id':[101,102,103,104,105,106],
         'eng_score': [34,16,67,56,97,74]}
data4 = {'student_id':[104,105,106,107,108,109],
         'eng_score':[45,43,75,87,45,65]}

df1 = pd.DataFrame(data=data1)
df2= pd.DataFrame(data=data2)
df3 = pd.DataFrame(data=data3)
df4 = pd.DataFrame(data=data4)

df1.set_index('student_id',inplace=True)
df2.set_index('student_id',inplace=True)
df3.set_index('student_id',inplace=True)
df4.set_index('student_id',inplace=True)

print(df1)
print(df2)
print(df3)
print(df4)
# 1번과 2번은 위 아래 // 1번과 3번은 옆으로 // 1번과 4번은 일부

df=pd.concat([df1,df2],axis=0)
print(df)
df=pd.concat([df1,df3],axis=0)
print(df)
df = pd.concat([df1,df3],join='inner') # 둘다 있어야 데이터나옴
print(df)
df = pd.merge(df1,df3,left_index = True, right_index = True)
print(df)
df = pd.merge(df1.reset_index(),df3.reset_index())
print(df)
df = pd.merge(df1.reset_index(),df3.reset_index(),left_on='student_id',right_on='student_id')
print(df)
df = pd.merge(df1.reset_index(),df3.reset_index(),left_on='student_id',right_on='student_id')
print(df)
df = pd.merge(df1.reset_index(),df3.reset_index(),left_on='student_id',right_on='student_id').set_index('student_id')
print(df)

print(df1.join(df3)) # merge(df1,df3)
print(df1.join(df4,how='outer'))
print(df1.join(df4,how='inner'))
print(df1.join(df4,how='left'))
print(df1.join(df4,how='right'))

# DataFrame 객체의 정렬
data = {'class': ['a','a','a','b','b','b','c','c','c'],
        'student_id' : [1,2,3,1,2,3,1,2,3],
        'math_score' : [67,45,76,87,45,45,65,76,98],
        'eng_score' : [67,46,74,46,86,67,56,34,53]}
df = pd.DataFrame(data=data)
print(df)
print(df.sort_values(by='math_score')) #오름차순
print(df.sort_values(by='math_score',ascending=False)) #내림차순
print(df.sort_values(by=['class','math_score'],ascending=[True,False]))

# 피벗 테이블
data = {'class': ['a','a','a','b','b','b','c','c','c'],
        'student_id' : [1,2,3,1,2,3,1,2,3],
        'math_score' : [67,45,76,87,45,45,65,76,98],
        'eng_score' : [67,46,74,46,86,67,56,34,53]}
df = pd.DataFrame(data=data)
print(df)
print(df.pivot_table(columns='student_id',index='class'))
print(df.pivot_table(columns='student_id',index='class',values='eng_score'))
print(df.pivot(columns='student_id'))
print(df.pivot_table(columns='student_id',aggfunc='mean'))
print(df.pivot_table(columns='student_id',values =['eng_score','math_score'],aggfunc=lambda x : sum(map(lambda a: a**2,x))))
반응형