Differences
This shows you the differences between two versions of the page.
Both sides previous revision Previous revision Next revision | Previous revision | ||
software_development:python_pandas [2022/08/04 15:03] prgram [group by] |
software_development:python_pandas [2023/05/16 15:17] (current) prgram [encoding_errors - 'ignore'] |
||
---|---|---|---|
Line 109: | Line 109: | ||
| | ||
df.loc[:,~df.columns.isin(['a','b'])] | df.loc[:,~df.columns.isin(['a','b'])] | ||
+ | |||
+ | df[~( df['a'].isin(['1','2','3']) & df['b']=='3' )] #row-wise | ||
+ | df.loc[~( df['a'].isin(['1','2','3']) & df['b']=='3' ), 8] #row-wise & column | ||
</code> | </code> | ||
Line 120: | Line 123: | ||
| | ||
=====I/O file===== | =====I/O file===== | ||
+ | |||
+ | === encoding_errors - 'ignore'=== | ||
+ | Encoding 제대로 했는데도 안되면.. | ||
+ | 공공데이터가 이런 경우가 많음. | ||
+ | |||
+ | Error tokenizing data. C error: EOF inside string starting at row 0 | 판다스 에러 | ||
+ | https://con2joa.tistory.com/m/60 | ||
+ | quoting=csv.QUOTE_NONE 파라미터 | ||
+ | |||
+ | <code python> | ||
+ | import chardet | ||
+ | with open(file, 'rb') as rawdata: | ||
+ | result = chardet.detect(rawdata.read(100000)) | ||
+ | result | ||
+ | |||
+ | |||
+ | data = pd.read_csv( file, encoding='cp949', encoding_errors='ignore') | ||
+ | # on_bad_lines='skip' | ||
+ | # error_bad_lines=False | ||
+ | </code> | ||
=== to_numberic === | === to_numberic === |