Differences

This shows you the differences between two versions of the page.

--- software_development:python_pandas [2022/08/04 15:00]
prgram [order of columns]
+++ software_development:python_pandas [2023/05/16 15:09]
prgram [encoding_errors - 'ignore']
@@ Line 27: / Line 27: @@
 <code python>
 df.groupby([컬럼들]).agg({'컬럼':sum}).reset_index()
+df.groupby([COLUMNS])['COLUMN'].max().reset_index()
 df = df.assign(date=pd.to_numeric(df['date'], errors='coerce')).groupby(['코드', '종목명']).agg({'date':np.min}).reset_index().drop_duplicates()
@@ Line 105: / Line 107: @@
 iloc: Select by position
 loc: Select by label
+df.loc[:,~df.columns.isin(['a','b'])]
+df[~( df['a'].isin(['1','2','3']) & df['b']=='3' )]		#row-wise
+df.loc[~( df['a'].isin(['1','2','3']) & df['b']=='3' ), 8]	#row-wise & column
 </code>
@@ Line 116: / Line 123: @@
 =====I/O file=====
+=== encoding_errors - 'ignore'===
+Encoding 제대로 했는데도 안되면..
+공공데이터가 이런 경우가 많음.
+<code python>
+import chardet
+with open(file, 'rb') as rawdata:
+    result = chardet.detect(rawdata.read(100000))
+result
+data = pd.read_csv( file, encoding='cp949', encoding_errors='ignore')
+# on_bad_lines='skip'
+# error_bad_lines=False
+</code>
 === to_numberic ===