Differences
This shows you the differences between two versions of the page.
Both sides previous revision Previous revision Next revision | Previous revision Next revisionBoth sides next revision | ||
software_development:python_pandas [2022/08/04 05:46] – [Pivot_table] prgram | software_development:python_pandas [2023/05/16 06:09] – [encoding_errors - 'ignore'] prgram | ||
---|---|---|---|
Line 1: | Line 1: | ||
====== python pandas ====== | ====== python pandas ====== | ||
{{INLINETOC}} | {{INLINETOC}} | ||
+ | |||
+ | === etc : list === | ||
+ | <code python> | ||
+ | set( [list] ) # unique value | ||
+ | [list].sort() # | ||
+ | [list1] + [list2] | ||
+ | </ | ||
===== shape of df ===== | ===== shape of df ===== | ||
Line 20: | Line 27: | ||
<code python> | <code python> | ||
df.groupby([컬럼들]).agg({' | df.groupby([컬럼들]).agg({' | ||
+ | |||
+ | df.groupby([COLUMNS])[' | ||
df = df.assign(date=pd.to_numeric(df[' | df = df.assign(date=pd.to_numeric(df[' | ||
Line 53: | Line 62: | ||
df.columns = [' | df.columns = [' | ||
</ | </ | ||
+ | |||
+ | === order of columns === | ||
+ | <code python> | ||
+ | #1 | ||
+ | df = df.sort_index(axis=' | ||
+ | #2 | ||
+ | df.columns | ||
+ | col_order = [' | ||
+ | df = df.reindex(col_order, | ||
+ | </ | ||
+ | |||
=== map === | === map === | ||
Line 87: | Line 107: | ||
iloc: Select by position | iloc: Select by position | ||
loc: Select by label | loc: Select by label | ||
+ | | ||
+ | df.loc[:, | ||
+ | |||
+ | df[~( df[' | ||
+ | df.loc[~( df[' | ||
</ | </ | ||
Line 98: | Line 123: | ||
| | ||
=====I/O file===== | =====I/O file===== | ||
+ | |||
+ | === encoding_errors - ' | ||
+ | Encoding 제대로 했는데도 안되면.. | ||
+ | 공공데이터가 이런 경우가 많음. | ||
+ | |||
+ | <code python> | ||
+ | import chardet | ||
+ | with open(file, ' | ||
+ | result = chardet.detect(rawdata.read(100000)) | ||
+ | result | ||
+ | |||
+ | |||
+ | data = pd.read_csv( file, encoding=' | ||
+ | # on_bad_lines=' | ||
+ | # error_bad_lines=False | ||
+ | </ | ||
=== to_numberic === | === to_numberic === |