pandas

read_csv()

pandas.read_csv(file_path)

Читает данные из файла и возвращает pandas.DataFrame().

df = pandas.read_csv('data.csv', index_col=0, parse_dates=['date_'])
edu = pandas.read_csv(
    'some.csv',
    na_values=':',
    usecols=['TIME', 'GEO', 'Value'],
)

edu
"""
    TIME GEO    Value
  0   2000 sg11   sv21
  1   2000 sg12   sv22
  2   2000 sg13   sv23
  ...
  500 2000 sg1500 sv2500
"""

edu.head()
"""
  TIME GEO Value
  0 2000 sg11 sv21
  1 2000 sg12 sv22
  2 2000 sg13 sv23
"""

edu.tail()
"""
  498 2000 sg1498 sv2498
  499 2000 sg1499 sv2499
  500 2000 sg1500 sv2500
"""

edu.describe()
"""
      TIME        Value
count 384.000000  361.000000
mean  2005.500000 5.203989
std   3.456556    1.021694
min   2000.000000 2.880000
25%   2002.750000 4.620000
50%   2005.500000 5.060000
75%   2008.250000 5.660000
max   2011.000000 8.810000
"""

edu['Value']
"""
0 NaN
1 NaN
2 5.00
3 5.03
4 4.95
...
"""

edu[10:14]
"""
   TIME GEO   Value
  10 2000 sg110 sv210
  11 2000 sg111 sv211
  12 2000 sg112 sv212
  13 2000 sg113 sv213
  14 2000 sg114 sv214
"""

edu.ix[90:94 , ['TIME ','GEO']]
"""
   TIME GEO
  90 2000 sg190
  91 2000 sg191
  92 2000 sg192
  93 2000 sg193
  94 2000 sg194
"""

edu[edu['Value '] > 6.5].tail()
"""
    TIME GEO     Value
218 2002 Cyprus  6.60
281 2005 Malta   6.58
94  2010 Belgium 6.58
93  2009 Belgium 6.57
95  2011 Belgium 6.55
"""

edu[edu["Value"].isnull()].head()
"""
   TIME GEO                           Value
0  2000 European Union (28 countries) NaN
1  2001 European Union (28 countries) NaN
36 2000 Euro area (18 countries)      NaN
37 2001 Euro area (18 countries)      NaN
48 2000 Euro area (17 countries)      NaN
"""

edu.max(axis=0)
"""
TIME 2011
GEO Spain
Value 8.81
"""

edu['Value'].max()
# 8.81

"""
count() Number of non-null observations
sum() Sum of values
mean() Mean of values
median() Arithmetic median of values
min() Minimum
max() Maximum
prod() Product of values
std() Unbiased standard deviation
var() Unbiased variance
"""

s = edu['Value']/100
s.head()
"""
0 NaN
1 NaN
2 0.0500
3 0.0503
4 0.0495
"""

s = edu['Value'].apply(numpy.sqrt)
s.head()
"""
0 NaN
1 NaN
2 2.236068
3 2.242766
4 2.224860
"""

edu['ValueNorm'] = edu['Value']/edu['Value'].max()
edu.tail()
"""
    TIME GEO     Value ValueNorm
379 2007 Finland 5.90  0.669694
380 2008 Finland 6.10  0.692395
381 2009 Finland 6.81  0.772985
382 2010 Finland 6.85  0.777526
383 2011 Finland 6.76  0.767310
"""

DataFrame()

class pandas.DataFrame
data = {
    'year': [
        2010, 2011, 2012,
        2010, 2011, 2012,
        2010, 2011, 2012
    ],
    'team': [
        'FCBarcelona', 'FCBarcelona', 'FCBarcelona',
        'RMadrid', 'RMadrid', 'RMadrid',
        'ValenciaCF', 'ValenciaCF', 'ValenciaCF'
    ],
    'wins': [
        30, 28, 32,
        29, 32, 26,
        21, 17, 19
    ],
    'draws': [
        6, 7, 4,
        5, 4, 7,
        8, 10, 8
    ],
    'losses': [
        2, 3, 2,
        4, 2, 5,
        9, 11, 11
    ]
}

football = pd.DataFrame(
    data ,
    columns = [
        'year', 'team', 'wins', 'draws', 'losses'
    ]
)

        # изменяем значения в указанном столбце
football.losses.map(lambda x: x+1)

football.losses.map({
        2: A,
        3: B,
})
copy()

Возвращает копию объекта

df_copy = df.copy()
drop()
df.drop(columns='val_updated')
head()

Возвращает строку, голову/шапку данных таблицы

edu.head()
"""
  TIME GEO Value
  0 2000 sg11 sv21
  1 2000 sg12 sv22
  2 2000 sg13 sv23
"""

edu.head(5)
"""
  TIME GEO Value
  0 2000 sg11 sv21
  ...
  4 2000 sg13 sv23
"""
set_inex()
df.set_index('date_')

df.set_index(['Date', 'Store'])
df.set_index('UPC EAN', append=True, inplace=True)
sample()

Отображает укзанное количество строк

    df.sample(10)
    """
  TIME GEO Value
  0 2000 sg11 sv21
  ...
  9 2000 sg13 sv23
"""
sort_values()

Сортирует таблицу по указанному стобцу

df.sort_values('date_')