아래 문서를 공부함

Pandas import + 데이터 불러오기

import pandas as pd
airports = pd.read_csv('https://ourairports.com/data/airports.csv')
airport_freq = pd.read_csv('https://ourairports.com/data/airport-frequencies.csv')
runways = pd.read_csv('https://ourairports.com/data/runways.csv')
type(airports)
pandas.core.frame.DataFrame

Dataframe

airports.head(4)
ididenttypenamelatitude_deglongitude_degelevation_ftcontinentiso_countryiso_regionmunicipalityscheduled_servicegps_codeiata_codelocal_codehome_linkwikipedia_linkkeywords
0652300AheliportTotal Rf Heliport40.070801-74.93360111.0NaNUSUS-PABensalemno00ANaN00ANaNNaNNaN
132336100AAsmall_airportAero B Ranch Airport38.704022-101.4739113435.0NaNUSUS-KSLeotino00AANaN00AANaNNaNNaN
2652400AKsmall_airportLowell Field59.949200-151.695999450.0NaNUSUS-AKAnchor Pointno00AKNaN00AKNaNNaNNaN
3652500ALsmall_airportEpps Airpark34.864799-86.770302820.0NaNUSUS-ALHarvestno00ALNaN00ALNaNNaNNaN
airports.shape
(57376, 18)
airports.columns
Index(['id', 'ident', 'type', 'name', 'latitude_deg', 'longitude_deg',
       'elevation_ft', 'continent', 'iso_country', 'iso_region',
       'municipality', 'scheduled_service', 'gps_code', 'iata_code',
       'local_code', 'home_link', 'wikipedia_link', 'keywords'],
      dtype='object')
airports.describe()
idlatitude_deglongitude_degelevation_ft
count57376.00000057376.00000057376.00000049557.000000
mean93753.35446925.571000-34.6113891253.089210
std127917.35927727.28287079.9094071615.832743
min2.000000-90.000000-179.876999-1266.000000
25%14472.7500007.665290-92.046148208.000000
50%28960.50000036.222551-71.458750722.000000
75%45914.25000043.76455514.5370731517.000000
max335599.00000090.000000179.99989429977.000000

Pandas에서 직접 SQL 구문 실행하기

df1 = airport_freq[airport_freq.airport_ident == 'KLAX'] \
                  .sort_values('type', ascending=False)
df1
idairport_refairport_identtypedescriptionfrequency_mhz
11962607763632KLAXUNICUNICOM122.95
11961607753632KLAXTWRTWR119.80
11960607743632KLAXOPSAF37.22
11958607723632KLAXMISCCG34.50
11959607733632KLAXMISCCG898.40
11957607713632KLAXGNDGND121.65
11956607703632KLAXDEPSOCAL DEP124.30
11955607693632KLAXCLDCLNC DEL121.40
11954607683632KLAXATISATIS133.80
11952607673632KLAXAPPSOCAL APP36.07
11953607663632KLAXAPPSOCAL APP124.30
!pip install pandasql
Requirement already satisfied: pandasql in d:\bin\miniconda3\envs\blog\lib\site-packages (0.7.3)
Requirement already satisfied: pandas in d:\bin\miniconda3\envs\blog\lib\site-packages (from pandasql) (1.1.1)
Requirement already satisfied: sqlalchemy in d:\bin\miniconda3\envs\blog\lib\site-packages (from pandasql) (1.3.19)
Requirement already satisfied: numpy in d:\bin\miniconda3\envs\blog\lib\site-packages (from pandasql) (1.19.1)
Requirement already satisfied: python-dateutil>=2.7.3 in d:\bin\miniconda3\envs\blog\lib\site-packages (from pandas->pandasql) (2.8.1)
Requirement already satisfied: pytz>=2017.2 in d:\bin\miniconda3\envs\blog\lib\site-packages (from pandas->pandasql) (2020.1)
Requirement already satisfied: six>=1.5 in d:\bin\miniconda3\envs\blog\lib\site-packages (from python-dateutil>=2.7.3->pandas->pandasql) (1.15.0)
import pandasql as ps
sql = '''
  select * 
    from airport_freq 
   where airport_ident = 'KLAX' 
order by type desc;
'''
df = ps.sqldf(sql)
type(df)
pandas.core.frame.DataFrame
df
idairport_refairport_identtypedescriptionfrequency_mhz
0607763632KLAXUNICUNICOM122.95
1607753632KLAXTWRTWR119.80
2607743632KLAXOPSAF37.22
3607723632KLAXMISCCG34.50
4607733632KLAXMISCCG898.40
5607713632KLAXGNDGND121.65
6607703632KLAXDEPSOCAL DEP124.30
7607693632KLAXCLDCLNC DEL121.40
8607683632KLAXATISATIS133.80
9607673632KLAXAPPSOCAL APP36.07
10607663632KLAXAPPSOCAL APP124.30

SELECT, WHERE, DISTINCT, LIMIT

select * 
  from airports;
airports
ididenttypenamelatitude_deglongitude_degelevation_ftcontinentiso_countryiso_regionmunicipalityscheduled_servicegps_codeiata_codelocal_codehome_linkwikipedia_linkkeywords
0652300AheliportTotal Rf Heliport40.070801-74.93360111.0NaNUSUS-PABensalemno00ANaN00ANaNNaNNaN
132336100AAsmall_airportAero B Ranch Airport38.704022-101.4739113435.0NaNUSUS-KSLeotino00AANaN00AANaNNaNNaN
2652400AKsmall_airportLowell Field59.949200-151.695999450.0NaNUSUS-AKAnchor Pointno00AKNaN00AKNaNNaNNaN
3652500ALsmall_airportEpps Airpark34.864799-86.770302820.0NaNUSUS-ALHarvestno00ALNaN00ALNaNNaNNaN
4652600ARclosedNewport Hospital & Clinic Heliport35.608700-91.254898237.0NaNUSUS-ARNewportnoNaNNaNNaNNaNNaN00AR
.........................................................
57371317861ZYYKmedium_airportYingkou Lanqi Airport40.542524122.3586000.0ASCNCN-21YingkouyesZYYKYKHNaNNaNhttps://en.wikipedia.org/wiki/Yingkou_Lanqi_Ai...NaN
5737232753ZYYYmedium_airportShenyang Dongta Airport41.784401123.496002NaNASCNCN-21ShenyangnoZYYYNaNNaNNaNNaNNaN
5737346378ZZ-0001heliportSealand Helipad51.8944441.48250040.0EUGBGB-ENGSealandnoNaNNaNNaNhttp://www.sealandgov.org/https://en.wikipedia.org/wiki/Principality_of_...Roughs Tower Helipad
57374307326ZZ-0002small_airportGlorioso Islands Airstrip-11.58427847.29638911.0AFTFTF-U-AGrande GlorieusenoNaNNaNNaNNaNNaNNaN
57375313629ZZZZsmall_airportSatsuma Iōjima Airport30.784722130.270556338.0ASJPJP-46Mishima-MuranoRJX7NaNRJX7NaNhttp://wikimapia.org/6705190/Satsuma-Iwo-jima-...SATSUMA,IWOJIMA,RJX7

57376 rows × 18 columns

select municipality 
  from airports
sr = airports.municipality
print(type(sr))
<class 'pandas.core.series.Series'>
sr
0                Bensalem
1                   Leoti
2            Anchor Point
3                 Harvest
4                 Newport
               ...       
57371             Yingkou
57372            Shenyang
57373             Sealand
57374    Grande Glorieuse
57375        Mishima-Mura
Name: municipality, Length: 57376, dtype: object
select municipality 
  from airports
 limit 4;
airports.head(4).municipality
0        Bensalem
1           Leoti
2    Anchor Point
3         Harvest
Name: municipality, dtype: object
airports.municipality.head(4)
0        Bensalem
1           Leoti
2    Anchor Point
3         Harvest
Name: municipality, dtype: object
select ident, 
       name, 
       municipality 
  from airports;
df = airports[['ident', 'name', 'municipality']]
type(df)
pandas.core.frame.DataFrame
df
identnamemunicipality
000ATotal Rf HeliportBensalem
100AAAero B Ranch AirportLeoti
200AKLowell FieldAnchor Point
300ALEpps AirparkHarvest
400ARNewport Hospital & Clinic HeliportNewport
............
57371ZYYKYingkou Lanqi AirportYingkou
57372ZYYYShenyang Dongta AirportShenyang
57373ZZ-0001Sealand HelipadSealand
57374ZZ-0002Glorioso Islands AirstripGrande Glorieuse
57375ZZZZSatsuma Iōjima AirportMishima-Mura

57376 rows × 3 columns

select * 
  from airports 
 where municipality = 'Anchor Point';
airports[airports.municipality == 'Anchor Point']
ididenttypenamelatitude_deglongitude_degelevation_ftcontinentiso_countryiso_regionmunicipalityscheduled_servicegps_codeiata_codelocal_codehome_linkwikipedia_linkkeywords
2652400AKsmall_airportLowell Field59.9492-151.695999450.0NaNUSUS-AKAnchor Pointno00AKNaN00AKNaNNaNNaN
1052316097AK00small_airportAnchor River Airpark59.7967-151.863007120.0NaNUSUS-AKAnchor PointnoAK00NaNAK00NaNNaNNaN
airports[airports['municipality'] == 'Anchor Point']
ididenttypenamelatitude_deglongitude_degelevation_ftcontinentiso_countryiso_regionmunicipalityscheduled_servicegps_codeiata_codelocal_codehome_linkwikipedia_linkkeywords
2652400AKsmall_airportLowell Field59.9492-151.695999450.0NaNUSUS-AKAnchor Pointno00AKNaN00AKNaNNaNNaN
1052316097AK00small_airportAnchor River Airpark59.7967-151.863007120.0NaNUSUS-AKAnchor PointnoAK00NaNAK00NaNNaNNaN
airports[airports.index == 2]
ididenttypenamelatitude_deglongitude_degelevation_ftcontinentiso_countryiso_regionmunicipalityscheduled_servicegps_codeiata_codelocal_codehome_linkwikipedia_linkkeywords
2652400AKsmall_airportLowell Field59.9492-151.695999450.0NaNUSUS-AKAnchor Pointno00AKNaN00AKNaNNaNNaN
df = airports[airports.municipality == 'Anchor Point']
type(df)
pandas.core.frame.DataFrame
df
ididenttypenamelatitude_deglongitude_degelevation_ftcontinentiso_countryiso_regionmunicipalityscheduled_servicegps_codeiata_codelocal_codehome_linkwikipedia_linkkeywords
2652400AKsmall_airportLowell Field59.9492-151.695999450.0NaNUSUS-AKAnchor Pointno00AKNaN00AKNaNNaNNaN
1052316097AK00small_airportAnchor River Airpark59.7967-151.863007120.0NaNUSUS-AKAnchor PointnoAK00NaNAK00NaNNaNNaN
select ident, 
       name, 
       municipality 
  from airports 
 where municipality = 'Los Angeles'
airports[airports.municipality == 'Los Angeles'] \
             [['ident', 'name', 'municipality']]
identnamemunicipality
7101CNLos Angeles County Sheriff's Department HeliportLos Angeles
6430CA0Drew Medical Center HeliportLos Angeles
6450CA2VA Greater Los Angeles Health Care Center Heli...Los Angeles
6700CL7Good Samaritan Hospital HeliportLos Angeles
146514LDevonshire Area HeliportLos Angeles
............
52334US-1472Murdock Plaza HelipadLos Angeles
52356US-1494Center West HelipadLos Angeles
52358US-1496W Hollywood HelipadLos Angeles
52359US-14971600 Vine HelipadLos Angeles
52360US-1498Vine Street Tower HelipadLos Angeles

100 rows × 3 columns

airports[['ident', 'name', 'municipality']] \
            [airports.municipality == 'Los Angeles']
identnamemunicipality
7101CNLos Angeles County Sheriff's Department HeliportLos Angeles
6430CA0Drew Medical Center HeliportLos Angeles
6450CA2VA Greater Los Angeles Health Care Center Heli...Los Angeles
6700CL7Good Samaritan Hospital HeliportLos Angeles
146514LDevonshire Area HeliportLos Angeles
............
52334US-1472Murdock Plaza HelipadLos Angeles
52356US-1494Center West HelipadLos Angeles
52358US-1496W Hollywood HelipadLos Angeles
52359US-14971600 Vine HelipadLos Angeles
52360US-1498Vine Street Tower HelipadLos Angeles

100 rows × 3 columns

select distinct municipality 
  from airport;
airports.municipality.unique()
array(['Bensalem', 'Leoti', 'Anchor Point', ..., 'Sealand',
       'Grande Glorieuse', 'Mishima-Mura'], dtype=object)
airports[['ident', 'name', 'municipality']] #.unique()
identnamemunicipality
000ATotal Rf HeliportBensalem
100AAAero B Ranch AirportLeoti
200AKLowell FieldAnchor Point
300ALEpps AirparkHarvest
400ARNewport Hospital & Clinic HeliportNewport
............
57371ZYYKYingkou Lanqi AirportYingkou
57372ZYYYShenyang Dongta AirportShenyang
57373ZZ-0001Sealand HelipadSealand
57374ZZ-0002Glorioso Islands AirstripGrande Glorieuse
57375ZZZZSatsuma Iōjima AirportMishima-Mura

57376 rows × 3 columns

SELECT with multiple conditions

select * 
  from airports 
 where iso_region = 'US-CA' 
   and type = 'seaplane_base';
df = airports[(airports.iso_region == 'US-CA') 
              & (airports.type == 'seaplane_base')] \
             [['name', 'iso_region', 'type']]
print(type(df))
<class 'pandas.core.frame.DataFrame'>
df
nameiso_regiontype
984San Luis Reservoir Seaplane BaseUS-CAseaplane_base
2505Commodore Center Seaplane BaseUS-CAseaplane_base
6278Konocti - Clear Lake Seaplane BaseUS-CAseaplane_base
13587Folsom Lake Seaplane BaseUS-CAseaplane_base
15932Ferndale Resort Seaplane BaseUS-CAseaplane_base
18323Lake Berryessa Seaplane BaseUS-CAseaplane_base
23853Bridge Bay Resort Seaplane BaseUS-CAseaplane_base
31879Pebbly Beach Seaplane BaseUS-CAseaplane_base
38693Lake Oroville Landing Area Seaplane BaseUS-CAseaplane_base
42411Lost Isle Seaplane BaseUS-CAseaplane_base
49939Two Harbors Amphibious TerminalUS-CAseaplane_base
select ident, 
       name, 
       municipality 
  from airports 
 where iso_region = 'US-CA' 
   and type = 'large_airport';
airports[(airports.iso_region == 'US-CA') & (airports.type == 'large_airport')] \
             [['ident', 'name', 'municipality']]
identnamemunicipality
27512KBABBeale Air Force BaseMarysville
28350KEDWEdwards Air Force BaseEdwards
29141KLAXLos Angeles International AirportLos Angeles
29668KOAKMetropolitan Oakland International AirportOakland
29724KONTOntario International AirportOntario
31245KSANSan Diego International AirportSan Diego
31285KSFOSan Francisco International AirportSan Francisco
31306KSJCNorman Y. Mineta San Jose International AirportSan Jose
31328KSMFSacramento International AirportSacramento
31334KSNAJohn Wayne Airport-Orange County AirportSanta Ana
31383KSUUTravis Air Force BaseFairfield
31617KVBGVandenberg Air Force BaseLompoc
airports[['ident', 'name', 'municipality']] \
             [(airports.iso_region == 'US-CA') & (airports.type == 'large_airport')] 
identnamemunicipality
27512KBABBeale Air Force BaseMarysville
28350KEDWEdwards Air Force BaseEdwards
29141KLAXLos Angeles International AirportLos Angeles
29668KOAKMetropolitan Oakland International AirportOakland
29724KONTOntario International AirportOntario
31245KSANSan Diego International AirportSan Diego
31285KSFOSan Francisco International AirportSan Francisco
31306KSJCNorman Y. Mineta San Jose International AirportSan Jose
31328KSMFSacramento International AirportSacramento
31334KSNAJohn Wayne Airport-Orange County AirportSanta Ana
31383KSUUTravis Air Force BaseFairfield
31617KVBGVandenberg Air Force BaseLompoc

ORDER BY

  select * 
    from airport_freq 
   where airport_ident = 'KLAX' 
order by type;
airport_freq[airport_freq.airport_ident == 'KLAX'].sort_values('type')
idairport_refairport_identtypedescriptionfrequency_mhz
11952607673632KLAXAPPSOCAL APP36.07
11953607663632KLAXAPPSOCAL APP124.30
11954607683632KLAXATISATIS133.80
11955607693632KLAXCLDCLNC DEL121.40
11956607703632KLAXDEPSOCAL DEP124.30
11957607713632KLAXGNDGND121.65
11958607723632KLAXMISCCG34.50
11959607733632KLAXMISCCG898.40
11960607743632KLAXOPSAF37.22
11961607753632KLAXTWRTWR119.80
11962607763632KLAXUNICUNICOM122.95
  select * 
    from airport_freq 
   where airport_ident = 'KLAX' 
order by type desc;
airport_freq[airport_freq.airport_ident == 'KLAX'].sort_values('type', ascending=False)
idairport_refairport_identtypedescriptionfrequency_mhz
11962607763632KLAXUNICUNICOM122.95
11961607753632KLAXTWRTWR119.80
11960607743632KLAXOPSAF37.22
11958607723632KLAXMISCCG34.50
11959607733632KLAXMISCCG898.40
11957607713632KLAXGNDGND121.65
11956607703632KLAXDEPSOCAL DEP124.30
11955607693632KLAXCLDCLNC DEL121.40
11954607683632KLAXATISATIS133.80
11952607673632KLAXAPPSOCAL APP36.07
11953607663632KLAXAPPSOCAL APP124.30

IN… NOT IN

select * 
  from airports 
 where type in ('heliport', 'balloonport');
airports[airports.type.isin(['heliport', 'balloonport'])]
ididenttypenamelatitude_deglongitude_degelevation_ftcontinentiso_countryiso_regionmunicipalityscheduled_servicegps_codeiata_codelocal_codehome_linkwikipedia_linkkeywords
0652300AheliportTotal Rf Heliport40.070801-74.93360111.0NaNUSUS-PABensalemno00ANaN00ANaNNaNNaN
932265800CNheliportKitchen Creek Helibase Heliport32.727374-116.4597423350.0NaNUSUS-CAPine Valleyno00CNNaN00CNNaNNaNNaN
12653200FDheliportRinghaver Heliport28.846600-82.34539825.0NaNUSUS-FLRiverviewno00FDNaN00FDNaNNaNNaN
15653500GEheliportCaffrey Heliport33.884201-84.733902957.0NaNUSUS-GAHiramno00GENaN00GENaNNaNNaN
16653600HIheliportKaupulehu Heliport19.832715-155.98023343.0NaNUSUS-HIKailua-Konano00HINaN00HINaNNaNNaN
.........................................................
56988312611ZA-0117heliportPumba Helipad-33.38779926.410595NaNAFZAZA-ECNelson Mandela BaynoNaNNaNNaNhttp://www.pumbagamereserve.co.za/amenitiesNaNNaN
56990312624ZA-0119heliportCape Town Heliport-33.90101818.425936NaNAFZAZA-WCNaNnoNaNNaNNaNNaNNaNNaN
57011318475ZA-0140heliportKuruman Hospital Heliport-27.45990423.443762NaNAFZAZA-NCKurumannoNaNNaNNaNNaNNaNNaN
57108301278ZIZheliportZamzama Heliport26.71094467.667250128.0ASPKPK-SDZamzama Gas FieldnoNaNZIZNaNNaNNaNNaN
5737346378ZZ-0001heliportSealand Helipad51.8944441.48250040.0EUGBGB-ENGSealandnoNaNNaNNaNhttp://www.sealandgov.org/https://en.wikipedia.org/wiki/Principality_of_...Roughs Tower Helipad

12031 rows × 18 columns

select * 
  from airports 
 where type not in ('heliport', 'balloonport');
airports[~airports.type.isin(['heliport', 'balloonport'])]
ididenttypenamelatitude_deglongitude_degelevation_ftcontinentiso_countryiso_regionmunicipalityscheduled_servicegps_codeiata_codelocal_codehome_linkwikipedia_linkkeywords
132336100AAsmall_airportAero B Ranch Airport38.704022-101.4739113435.0NaNUSUS-KSLeotino00AANaN00AANaNNaNNaN
2652400AKsmall_airportLowell Field59.949200-151.695999450.0NaNUSUS-AKAnchor Pointno00AKNaN00AKNaNNaNNaN
3652500ALsmall_airportEpps Airpark34.864799-86.770302820.0NaNUSUS-ALHarvestno00ALNaN00ALNaNNaNNaN
4652600ARclosedNewport Hospital & Clinic Heliport35.608700-91.254898237.0NaNUSUS-ARNewportnoNaNNaNNaNNaNNaN00AR
532212700ASsmall_airportFulton Airport34.942803-97.8180191100.0NaNUSUS-OKAlexno00ASNaN00ASNaNNaNNaN
.........................................................
5737027244ZYYJmedium_airportYanji Chaoyangchuan Airport42.882801129.451004624.0ASCNCN-22YanjiyesZYYJYNJNaNNaNhttps://en.wikipedia.org/wiki/Yanji_Chaoyangch...NaN
57371317861ZYYKmedium_airportYingkou Lanqi Airport40.542524122.3586000.0ASCNCN-21YingkouyesZYYKYKHNaNNaNhttps://en.wikipedia.org/wiki/Yingkou_Lanqi_Ai...NaN
5737232753ZYYYmedium_airportShenyang Dongta Airport41.784401123.496002NaNASCNCN-21ShenyangnoZYYYNaNNaNNaNNaNNaN
57374307326ZZ-0002small_airportGlorioso Islands Airstrip-11.58427847.29638911.0AFTFTF-U-AGrande GlorieusenoNaNNaNNaNNaNNaNNaN
57375313629ZZZZsmall_airportSatsuma Iōjima Airport30.784722130.270556338.0ASJPJP-46Mishima-MuranoRJX7NaNRJX7NaNhttp://wikimapia.org/6705190/Satsuma-Iwo-jima-...SATSUMA,IWOJIMA,RJX7

45345 rows × 18 columns

GROUP BY, COUNT, SUM, ORDER BY

  select iso_country, 
         type, 
         count(*) 
    from airports 
group by iso_country, 
         type 
order by iso_country, 
         type;
airports.groupby(['iso_country', 'type']).size()
iso_country  type          
AD           heliport            2
AE           closed              1
             heliport           21
             large_airport       4
             medium_airport      7
                              ... 
ZM           small_airport      94
ZW           closed              2
             large_airport       1
             medium_airport      8
             small_airport     128
Length: 850, dtype: int64
  select iso_country, 
         type, 
         count(*) 
    from airports 
group by iso_country, 
         type 
order by iso_country, 
         count(*) desc;
airports.groupby(['iso_country', 'type']).size()  \
        .to_frame('count').reset_index()  \
        .sort_values(['iso_country', 'count'], ascending=[True, False])
iso_countrytypecount
0ADheliport2
2AEheliport21
6AEsmall_airport16
4AEmedium_airport7
3AElarge_airport4
............
843ZMlarge_airport1
849ZWsmall_airport128
848ZWmedium_airport8
846ZWclosed2
847ZWlarge_airport1

850 rows × 3 columns

이건 좀 더 봐야겠다

airports.groupby(['iso_country', 'type']).sum()
idlatitude_deglongitude_degelevation_ft
iso_countrytype
ADheliport36007685.0574313.0527113450.0
AEclosed4225623.29810054.2216990.0
heliport6389256519.4819861140.423719706.0
large_airport31602099.910756220.694089375.0
medium_airport347052172.974312384.8242771406.0
..................
ZMsmall_airport11283607-1294.7180142640.091073255681.0
ZWclosed654955-34.87092154.8956656779.0
large_airport3005-17.93180131.0928004887.0
medium_airport24058-151.194701234.84700426744.0
small_airport19701085-2384.1262373832.335854273732.0

850 rows × 4 columns

HAVING

  select type, 
         count(*) 
    from airports 
   where iso_country = 'US' 
group by type 
  having count(*) > 1000 
order by count(*) desc;
s = airports[airports.iso_country == 'US'] \
             .groupby('type').filter(lambda g: len(g) > 1000) \
             .groupby('type').size() \
             .sort_values(ascending=False)
type(s)
pandas.core.series.Series
s
type
small_airport    13565
heliport          6316
closed            1930
dtype: int64
s.heliport
6316

Top N records

  select iso_country 
    from by_country 
order by size desc 
   limit 10;
by_country = airports.groupby(['iso_country']).size()\
                     .to_frame('airport_count').reset_index()
df = by_country.nlargest(10, columns='airport_count')
df
iso_countryairport_count
224US23260
29BR4987
35CA2796
12AU2019
152MX1405
117KR1374
74GB1203
184RU1121
54DE957
72FR894
  select iso_country 
    from by_country 
   order by size desc 
   limit 10 
  offset 10;
by_country.nlargest(10, columns='airport_count').tail(5)
iso_countryairport_count
117KR1374
74GB1203
184RU1121
54DE957
72FR894

Aggregate functions (MIN, MAX, MEAN)

  select max(length_ft), 
         min(length_ft), 
         avg(length_ft), 
         median(length_ft)   -- 사용자정의 함수로 만들었다 치고
    from runways;
runways.head(3)
idairport_refairport_identlength_ftwidth_ftsurfacelightedclosedle_identle_latitude_degle_longitude_degle_elevation_ftle_heading_degTle_displaced_threshold_fthe_identhe_latitude_deghe_longitude_deghe_elevation_fthe_heading_degThe_displaced_threshold_ft
0269408652300A80.080.0ASPH-G10H1NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1255155652400AK2500.070.0GRVL00NNaNNaNNaNNaNNaNSNaNNaNNaNNaNNaN
2254165652500AL2300.0200.0TURF0001NaNNaNNaNNaNNaN19NaNNaNNaNNaNNaN
runways.describe()
idairport_reflength_ftwidth_ftlightedclosedle_latitude_degle_longitude_degle_elevation_ftle_heading_degTle_displaced_threshold_fthe_latitude_deghe_longitude_deghe_elevation_fthe_heading_degThe_displaced_threshold_ft
count41639.00000041639.00000041443.00000038981.00000041639.00000041639.00000014449.00000014444.00000012235.00000014228.0000002819.00000014434.00000014436.00000012118.00000016043.0000003129.000000
mean257324.08218334094.5951633262.173684110.5201250.2607410.01556231.197196-40.3129251060.108868104.374123534.01348031.194157-40.2567151054.425400261.749082536.128476
std21373.78112468108.3105312735.615483231.5103620.4390440.12377623.02183179.8587311465.76577457.292638471.86707723.01901479.8442991458.61905678.508982466.220434
min232758.0000002.0000000.0000000.0000000.0000000.000000-75.597200-178.303000-1246.0000000.0000001.000000-75.595400-178.292000-1210.0000000.0000001.000000
25%243178.5000008543.5000001700.00000059.0000000.0000000.00000028.796800-96.667350117.00000057.000000229.50000028.788800-96.642750115.000000226.000000228.000000
50%253593.00000018692.0000002743.00000075.0000000.0000000.00000037.591200-80.403000571.00000098.700000400.00000037.579950-80.368750565.500000270.000000400.000000
75%264015.50000027430.5000004241.000000100.0000001.0000000.00000044.27740015.3221251247.000000148.000000685.00000044.27227515.3665751237.000000316.000000699.000000
max335591.000000335590.000000120000.0000009000.0000001.0000001.00000082.512800179.33700013202.000000360.0000006365.00000082.522800179.34300013314.000000363.0000005001.000000
# df = runways.agg({'length_ft': ['min', 'max', 'mean', 'median', 'count', 'std']})
df = runways.agg({'airport_ref': ['min', 'max', 'mean', 'median', 'count', 'std']})
df
airport_ref
min2.000000
max335590.000000
mean34094.595163
median18692.000000
count41639.000000
std68108.310531
df.T
minmaxmeanmediancountstd
airport_ref2.0335590.034094.59516318692.041639.068108.310531

JOIN

  select airport_ident, 
         a.type, 
         a.description, 
         frequency_mhz 
    from airport_freq as a join airports as b
      on airport_freq.airport_ref = airports.id 
   where airports.ident = 'KLAX'
airport_freq.merge(
        airports[airports.ident == 'KLAX'][['id']], 
        left_on='airport_ref', 
        right_on='id', 
        how='inner'
     )[['airport_ident', 'type', 'description', 'frequency_mhz']]
airport_identtypedescriptionfrequency_mhz
0KLAXAPPSOCAL APP36.07
1KLAXAPPSOCAL APP124.30
2KLAXATISATIS133.80
3KLAXCLDCLNC DEL121.40
4KLAXDEPSOCAL DEP124.30
5KLAXGNDGND121.65
6KLAXMISCCG34.50
7KLAXMISCCG898.40
8KLAXOPSAF37.22
9KLAXTWRTWR119.80
10KLAXUNICUNICOM122.95
pd.merge(
        airport_freq,
        airports[airports.ident == 'KLAX'][['id']], 
        left_on='airport_ref', 
        right_on='id', 
        how='inner'
     )[['airport_ident', 'type', 'description', 'frequency_mhz']]
airport_identtypedescriptionfrequency_mhz
0KLAXAPPSOCAL APP36.07
1KLAXAPPSOCAL APP124.30
2KLAXATISATIS133.80
3KLAXCLDCLNC DEL121.40
4KLAXDEPSOCAL DEP124.30
5KLAXGNDGND121.65
6KLAXMISCCG34.50
7KLAXMISCCG898.40
8KLAXOPSAF37.22
9KLAXTWRTWR119.80
10KLAXUNICUNICOM122.95

UNION ALL and UNION

  select name, 
         municipality 
    from airports 
   where ident = 'KLAX' 
union all 
  select name, 
         municipality 
    from airports 
   where ident = 'KLGB';
airports[airports.ident == 'KLAX'][['name', 'municipality']]
namemunicipality
29141Los Angeles International AirportLos Angeles
airports[airports.ident == 'KLGB'][['name', 'municipality']]
namemunicipality
29166Long Beach /Daugherty Field/ AirportLong Beach
pd.concat([
        airports[airports.ident == 'KLAX'][['name', 'municipality']], 
        airports[airports.ident == 'KLGB'][['name', 'municipality']]
    ])
namemunicipality
29141Los Angeles International AirportLos Angeles
29166Long Beach /Daugherty Field/ AirportLong Beach

INSERT

create table heroes (id integer, name text);	
insert into heroes values (1, 'Harry Potter');	
insert into heroes values (2, 'Ron Weasley');	
insert into heroes values (3, 'Hermione Granger');	
df1 = pd.DataFrame({'id': [1, 2], 'name': ['Harry Potter', 'Ron Weasley']})
df1
idname
01Harry Potter
12Ron Weasley
df2 = pd.DataFrame({'id': [3], 'name': ['Hermione Granger']})
df2
idname
03Hermione Granger
pd.concat([df1, df2]).reset_index(drop=True)
idname
01Harry Potter
12Ron Weasley
23Hermione Granger

UPDATE

update airports 
   set home_link = 'http://www.lawa.org/welcomelax.aspx' 
 where ident == 'KLAX';
airports[airports['ident'] == 'KLAX'][['home_link']]
# airports[airports.ident == 'KLAX'][['home_link']]
home_link
29141https://www.flylax.com/
airports.loc[airports['ident'] == 'KLAX', 'home_link']
29141    https://www.flylax.com/
Name: home_link, dtype: object
airports.loc[airports['ident'] == 'KLAX', 'home_link'] = 'http://www.lawa.org/welcomelax.aspx'
airports.loc[airports['ident'] == 'KLAX', 'home_link']
29141    http://www.lawa.org/welcomelax.aspx
Name: home_link, dtype: object
airports.loc[airports.type == 'heliport', 'home_link'] = 'http://haha'
airports[airports.type == 'heliport'][['ident', 'name', 'home_link']]
identnamehome_link
000ATotal Rf Heliporthttp://haha
900CNKitchen Creek Helibase Heliporthttp://haha
1200FDRinghaver Heliporthttp://haha
1500GECaffrey Heliporthttp://haha
1600HIKaupulehu Heliporthttp://haha
............
56988ZA-0117Pumba Helipadhttp://haha
56990ZA-0119Cape Town Heliporthttp://haha
57011ZA-0140Kuruman Hospital Heliporthttp://haha
57108ZIZZamzama Heliporthttp://haha
57373ZZ-0001Sealand Helipadhttp://haha

12007 rows × 3 columns

Immutability

airports.home_link = 'http//hoho' # work
airports.home_link
0        http//hoho
1        http//hoho
2        http//hoho
3        http//hoho
4        http//hoho
            ...    
57371    http//hoho
57372    http//hoho
57373    http//hoho
57374    http//hoho
57375    http//hoho
Name: home_link, Length: 57376, dtype: object
airports[airports['ident'] == 'KLAX'].home_link = 'http//haha' # not work
airports[airports['ident'] == 'KLAX'].home_link
d:\bin\miniconda3\envs\blog\lib\site-packages\pandas\core\generic.py:5159: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value

29141    http//hoho
Name: home_link, dtype: object

DELETE

delete from airport_freq 
      where type = 'MISC';
# 다시 깨끗하게 로딩하자
airport_freq = pd.read_csv('https://ourairports.com/data/airport-frequencies.csv')
airport_freq[airport_freq.type != 'MISC']
idairport_refairport_identtypedescriptionfrequency_mhz
070518652800CACTAFCTAF122.90
1307581658901FLARCALNaN122.90
275239658901FLCTAFCEDAR KNOLL TRAFFIC122.80
360191675604CACTAFCTAF122.90
459287677904MSUNICUNICOM122.80
.....................
289035124727242ZYTLATISATIS126.65
289045124827242ZYTLTWRDALIAN TWR118.25
289055124327243ZYTXATISATIS127.45
289065124427243ZYTXTWRSHENYANG TWR118.10
289075119027244ZYYJTWRTWR130.00

26683 rows × 6 columns

airport_freq.drop(airport_freq[airport_freq.type == 'MISC'].index)
idairport_refairport_identtypedescriptionfrequency_mhz
070518652800CACTAFCTAF122.90
1307581658901FLARCALNaN122.90
275239658901FLCTAFCEDAR KNOLL TRAFFIC122.80
360191675604CACTAFCTAF122.90
459287677904MSUNICUNICOM122.80
.....................
289035124727242ZYTLATISATIS126.65
289045124827242ZYTLTWRDALIAN TWR118.25
289055124327243ZYTXATISATIS127.45
289065124427243ZYTXTWRSHENYANG TWR118.10
289075119027244ZYYJTWRTWR130.00

26683 rows × 6 columns