아래 문서를 공부함
Pandas import + 데이터 불러오기
import pandas as pd
airports = pd.read_csv( 'https://ourairports.com/data/airports.csv' )
airport_freq = pd.read_csv( 'https://ourairports.com/data/airport-frequencies.csv' )
runways = pd.read_csv( 'https://ourairports.com/data/runways.csv' )
type (airports)
pandas.core.frame.DataFrame
Dataframe
airports.head( 4 )
id ident type name latitude_deg longitude_deg elevation_ft continent iso_country iso_region municipality scheduled_service gps_code iata_code local_code home_link wikipedia_link keywords 0 6523 00A heliport Total Rf Heliport 40.070801 -74.933601 11.0 NaN US US-PA Bensalem no 00A NaN 00A NaN NaN NaN 1 323361 00AA small_airport Aero B Ranch Airport 38.704022 -101.473911 3435.0 NaN US US-KS Leoti no 00AA NaN 00AA NaN NaN NaN 2 6524 00AK small_airport Lowell Field 59.949200 -151.695999 450.0 NaN US US-AK Anchor Point no 00AK NaN 00AK NaN NaN NaN 3 6525 00AL small_airport Epps Airpark 34.864799 -86.770302 820.0 NaN US US-AL Harvest no 00AL NaN 00AL NaN NaN NaN
airports.shape
(57376, 18)
airports.columns
Index(['id', 'ident', 'type', 'name', 'latitude_deg', 'longitude_deg',
'elevation_ft', 'continent', 'iso_country', 'iso_region',
'municipality', 'scheduled_service', 'gps_code', 'iata_code',
'local_code', 'home_link', 'wikipedia_link', 'keywords'],
dtype='object')
airports.describe()
id latitude_deg longitude_deg elevation_ft count 57376.000000 57376.000000 57376.000000 49557.000000 mean 93753.354469 25.571000 -34.611389 1253.089210 std 127917.359277 27.282870 79.909407 1615.832743 min 2.000000 -90.000000 -179.876999 -1266.000000 25% 14472.750000 7.665290 -92.046148 208.000000 50% 28960.500000 36.222551 -71.458750 722.000000 75% 45914.250000 43.764555 14.537073 1517.000000 max 335599.000000 90.000000 179.999894 29977.000000
Pandas에서 직접 SQL 구문 실행하기
df1 = airport_freq[airport_freq.airport_ident == 'KLAX' ] \
.sort_values( 'type' , ascending = False )
df1
id airport_ref airport_ident type description frequency_mhz 11962 60776 3632 KLAX UNIC UNICOM 122.95 11961 60775 3632 KLAX TWR TWR 119.80 11960 60774 3632 KLAX OPS AF 37.22 11958 60772 3632 KLAX MISC CG 34.50 11959 60773 3632 KLAX MISC CG 898.40 11957 60771 3632 KLAX GND GND 121.65 11956 60770 3632 KLAX DEP SOCAL DEP 124.30 11955 60769 3632 KLAX CLD CLNC DEL 121.40 11954 60768 3632 KLAX ATIS ATIS 133.80 11952 60767 3632 KLAX APP SOCAL APP 36.07 11953 60766 3632 KLAX APP SOCAL APP 124.30
! pip install pandasql
Requirement already satisfied: pandasql in d:\bin\miniconda3\envs\blog\lib\site-packages (0.7.3)
Requirement already satisfied: pandas in d:\bin\miniconda3\envs\blog\lib\site-packages (from pandasql) (1.1.1)
Requirement already satisfied: sqlalchemy in d:\bin\miniconda3\envs\blog\lib\site-packages (from pandasql) (1.3.19)
Requirement already satisfied: numpy in d:\bin\miniconda3\envs\blog\lib\site-packages (from pandasql) (1.19.1)
Requirement already satisfied: python-dateutil>=2.7.3 in d:\bin\miniconda3\envs\blog\lib\site-packages (from pandas->pandasql) (2.8.1)
Requirement already satisfied: pytz>=2017.2 in d:\bin\miniconda3\envs\blog\lib\site-packages (from pandas->pandasql) (2020.1)
Requirement already satisfied: six>=1.5 in d:\bin\miniconda3\envs\blog\lib\site-packages (from python-dateutil>=2.7.3->pandas->pandasql) (1.15.0)
import pandasql as ps
sql = '''
select *
from airport_freq
where airport_ident = 'KLAX'
order by type desc;
'''
df = ps.sqldf(sql)
type (df)
pandas.core.frame.DataFrame
df
id airport_ref airport_ident type description frequency_mhz 0 60776 3632 KLAX UNIC UNICOM 122.95 1 60775 3632 KLAX TWR TWR 119.80 2 60774 3632 KLAX OPS AF 37.22 3 60772 3632 KLAX MISC CG 34.50 4 60773 3632 KLAX MISC CG 898.40 5 60771 3632 KLAX GND GND 121.65 6 60770 3632 KLAX DEP SOCAL DEP 124.30 7 60769 3632 KLAX CLD CLNC DEL 121.40 8 60768 3632 KLAX ATIS ATIS 133.80 9 60767 3632 KLAX APP SOCAL APP 36.07 10 60766 3632 KLAX APP SOCAL APP 124.30
SELECT, WHERE, DISTINCT, LIMIT
select *
from airports;
airports
id ident type name latitude_deg longitude_deg elevation_ft continent iso_country iso_region municipality scheduled_service gps_code iata_code local_code home_link wikipedia_link keywords 0 6523 00A heliport Total Rf Heliport 40.070801 -74.933601 11.0 NaN US US-PA Bensalem no 00A NaN 00A NaN NaN NaN 1 323361 00AA small_airport Aero B Ranch Airport 38.704022 -101.473911 3435.0 NaN US US-KS Leoti no 00AA NaN 00AA NaN NaN NaN 2 6524 00AK small_airport Lowell Field 59.949200 -151.695999 450.0 NaN US US-AK Anchor Point no 00AK NaN 00AK NaN NaN NaN 3 6525 00AL small_airport Epps Airpark 34.864799 -86.770302 820.0 NaN US US-AL Harvest no 00AL NaN 00AL NaN NaN NaN 4 6526 00AR closed Newport Hospital & Clinic Heliport 35.608700 -91.254898 237.0 NaN US US-AR Newport no NaN NaN NaN NaN NaN 00AR ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... 57371 317861 ZYYK medium_airport Yingkou Lanqi Airport 40.542524 122.358600 0.0 AS CN CN-21 Yingkou yes ZYYK YKH NaN NaN https://en.wikipedia.org/wiki/Yingkou_Lanqi_Ai... NaN 57372 32753 ZYYY medium_airport Shenyang Dongta Airport 41.784401 123.496002 NaN AS CN CN-21 Shenyang no ZYYY NaN NaN NaN NaN NaN 57373 46378 ZZ-0001 heliport Sealand Helipad 51.894444 1.482500 40.0 EU GB GB-ENG Sealand no NaN NaN NaN http://www.sealandgov.org/ https://en.wikipedia.org/wiki/Principality_of_... Roughs Tower Helipad 57374 307326 ZZ-0002 small_airport Glorioso Islands Airstrip -11.584278 47.296389 11.0 AF TF TF-U-A Grande Glorieuse no NaN NaN NaN NaN NaN NaN 57375 313629 ZZZZ small_airport Satsuma Iōjima Airport 30.784722 130.270556 338.0 AS JP JP-46 Mishima-Mura no RJX7 NaN RJX7 NaN http://wikimapia.org/6705190/Satsuma-Iwo-jima-... SATSUMA,IWOJIMA,RJX7
57376 rows × 18 columns
select municipality
from airports
sr = airports.municipality
print ( type (sr))
<class 'pandas.core.series.Series'>
sr
0 Bensalem
1 Leoti
2 Anchor Point
3 Harvest
4 Newport
...
57371 Yingkou
57372 Shenyang
57373 Sealand
57374 Grande Glorieuse
57375 Mishima-Mura
Name: municipality, Length: 57376, dtype: object
select municipality
from airports
limit 4 ;
airports.head( 4 ).municipality
0 Bensalem
1 Leoti
2 Anchor Point
3 Harvest
Name: municipality, dtype: object
airports.municipality.head( 4 )
0 Bensalem
1 Leoti
2 Anchor Point
3 Harvest
Name: municipality, dtype: object
select ident,
name ,
municipality
from airports;
df = airports[[ 'ident' , 'name' , 'municipality' ]]
type (df)
pandas.core.frame.DataFrame
df
ident name municipality 0 00A Total Rf Heliport Bensalem 1 00AA Aero B Ranch Airport Leoti 2 00AK Lowell Field Anchor Point 3 00AL Epps Airpark Harvest 4 00AR Newport Hospital & Clinic Heliport Newport ... ... ... ... 57371 ZYYK Yingkou Lanqi Airport Yingkou 57372 ZYYY Shenyang Dongta Airport Shenyang 57373 ZZ-0001 Sealand Helipad Sealand 57374 ZZ-0002 Glorioso Islands Airstrip Grande Glorieuse 57375 ZZZZ Satsuma Iōjima Airport Mishima-Mura
57376 rows × 3 columns
select *
from airports
where municipality = 'Anchor Point' ;
airports[airports.municipality == 'Anchor Point' ]
id ident type name latitude_deg longitude_deg elevation_ft continent iso_country iso_region municipality scheduled_service gps_code iata_code local_code home_link wikipedia_link keywords 2 6524 00AK small_airport Lowell Field 59.9492 -151.695999 450.0 NaN US US-AK Anchor Point no 00AK NaN 00AK NaN NaN NaN 10523 16097 AK00 small_airport Anchor River Airpark 59.7967 -151.863007 120.0 NaN US US-AK Anchor Point no AK00 NaN AK00 NaN NaN NaN
airports[airports[ 'municipality' ] == 'Anchor Point' ]
id ident type name latitude_deg longitude_deg elevation_ft continent iso_country iso_region municipality scheduled_service gps_code iata_code local_code home_link wikipedia_link keywords 2 6524 00AK small_airport Lowell Field 59.9492 -151.695999 450.0 NaN US US-AK Anchor Point no 00AK NaN 00AK NaN NaN NaN 10523 16097 AK00 small_airport Anchor River Airpark 59.7967 -151.863007 120.0 NaN US US-AK Anchor Point no AK00 NaN AK00 NaN NaN NaN
airports[airports.index == 2 ]
id ident type name latitude_deg longitude_deg elevation_ft continent iso_country iso_region municipality scheduled_service gps_code iata_code local_code home_link wikipedia_link keywords 2 6524 00AK small_airport Lowell Field 59.9492 -151.695999 450.0 NaN US US-AK Anchor Point no 00AK NaN 00AK NaN NaN NaN
df = airports[airports.municipality == 'Anchor Point' ]
type (df)
pandas.core.frame.DataFrame
df
id ident type name latitude_deg longitude_deg elevation_ft continent iso_country iso_region municipality scheduled_service gps_code iata_code local_code home_link wikipedia_link keywords 2 6524 00AK small_airport Lowell Field 59.9492 -151.695999 450.0 NaN US US-AK Anchor Point no 00AK NaN 00AK NaN NaN NaN 10523 16097 AK00 small_airport Anchor River Airpark 59.7967 -151.863007 120.0 NaN US US-AK Anchor Point no AK00 NaN AK00 NaN NaN NaN
select ident,
name ,
municipality
from airports
where municipality = 'Los Angeles'
airports[airports.municipality == 'Los Angeles' ] \
[[ 'ident' , 'name' , 'municipality' ]]
ident name municipality 71 01CN Los Angeles County Sheriff's Department Heliport Los Angeles 643 0CA0 Drew Medical Center Heliport Los Angeles 645 0CA2 VA Greater Los Angeles Health Care Center Heli... Los Angeles 670 0CL7 Good Samaritan Hospital Heliport Los Angeles 1465 14L Devonshire Area Heliport Los Angeles ... ... ... ... 52334 US-1472 Murdock Plaza Helipad Los Angeles 52356 US-1494 Center West Helipad Los Angeles 52358 US-1496 W Hollywood Helipad Los Angeles 52359 US-1497 1600 Vine Helipad Los Angeles 52360 US-1498 Vine Street Tower Helipad Los Angeles
100 rows × 3 columns
airports[[ 'ident' , 'name' , 'municipality' ]] \
[airports.municipality == 'Los Angeles' ]
ident name municipality 71 01CN Los Angeles County Sheriff's Department Heliport Los Angeles 643 0CA0 Drew Medical Center Heliport Los Angeles 645 0CA2 VA Greater Los Angeles Health Care Center Heli... Los Angeles 670 0CL7 Good Samaritan Hospital Heliport Los Angeles 1465 14L Devonshire Area Heliport Los Angeles ... ... ... ... 52334 US-1472 Murdock Plaza Helipad Los Angeles 52356 US-1494 Center West Helipad Los Angeles 52358 US-1496 W Hollywood Helipad Los Angeles 52359 US-1497 1600 Vine Helipad Los Angeles 52360 US-1498 Vine Street Tower Helipad Los Angeles
100 rows × 3 columns
select distinct municipality
from airport;
airports.municipality.unique()
array(['Bensalem', 'Leoti', 'Anchor Point', ..., 'Sealand',
'Grande Glorieuse', 'Mishima-Mura'], dtype=object)
airports[[ 'ident' , 'name' , 'municipality' ]] #.unique()
ident name municipality 0 00A Total Rf Heliport Bensalem 1 00AA Aero B Ranch Airport Leoti 2 00AK Lowell Field Anchor Point 3 00AL Epps Airpark Harvest 4 00AR Newport Hospital & Clinic Heliport Newport ... ... ... ... 57371 ZYYK Yingkou Lanqi Airport Yingkou 57372 ZYYY Shenyang Dongta Airport Shenyang 57373 ZZ-0001 Sealand Helipad Sealand 57374 ZZ-0002 Glorioso Islands Airstrip Grande Glorieuse 57375 ZZZZ Satsuma Iōjima Airport Mishima-Mura
57376 rows × 3 columns
SELECT with multiple conditions
select *
from airports
where iso_region = 'US-CA'
and type = 'seaplane_base' ;
df = airports[(airports.iso_region == 'US-CA' )
& (airports.type == 'seaplane_base' )] \
[[ 'name' , 'iso_region' , 'type' ]]
print ( type (df))
<class 'pandas.core.frame.DataFrame'>
df
name iso_region type 984 San Luis Reservoir Seaplane Base US-CA seaplane_base 2505 Commodore Center Seaplane Base US-CA seaplane_base 6278 Konocti - Clear Lake Seaplane Base US-CA seaplane_base 13587 Folsom Lake Seaplane Base US-CA seaplane_base 15932 Ferndale Resort Seaplane Base US-CA seaplane_base 18323 Lake Berryessa Seaplane Base US-CA seaplane_base 23853 Bridge Bay Resort Seaplane Base US-CA seaplane_base 31879 Pebbly Beach Seaplane Base US-CA seaplane_base 38693 Lake Oroville Landing Area Seaplane Base US-CA seaplane_base 42411 Lost Isle Seaplane Base US-CA seaplane_base 49939 Two Harbors Amphibious Terminal US-CA seaplane_base
select ident,
name ,
municipality
from airports
where iso_region = 'US-CA'
and type = 'large_airport' ;
airports[(airports.iso_region == 'US-CA' ) & (airports.type == 'large_airport' )] \
[[ 'ident' , 'name' , 'municipality' ]]
ident name municipality 27512 KBAB Beale Air Force Base Marysville 28350 KEDW Edwards Air Force Base Edwards 29141 KLAX Los Angeles International Airport Los Angeles 29668 KOAK Metropolitan Oakland International Airport Oakland 29724 KONT Ontario International Airport Ontario 31245 KSAN San Diego International Airport San Diego 31285 KSFO San Francisco International Airport San Francisco 31306 KSJC Norman Y. Mineta San Jose International Airport San Jose 31328 KSMF Sacramento International Airport Sacramento 31334 KSNA John Wayne Airport-Orange County Airport Santa Ana 31383 KSUU Travis Air Force Base Fairfield 31617 KVBG Vandenberg Air Force Base Lompoc
airports[[ 'ident' , 'name' , 'municipality' ]] \
[(airports.iso_region == 'US-CA' ) & (airports.type == 'large_airport' )]
ident name municipality 27512 KBAB Beale Air Force Base Marysville 28350 KEDW Edwards Air Force Base Edwards 29141 KLAX Los Angeles International Airport Los Angeles 29668 KOAK Metropolitan Oakland International Airport Oakland 29724 KONT Ontario International Airport Ontario 31245 KSAN San Diego International Airport San Diego 31285 KSFO San Francisco International Airport San Francisco 31306 KSJC Norman Y. Mineta San Jose International Airport San Jose 31328 KSMF Sacramento International Airport Sacramento 31334 KSNA John Wayne Airport-Orange County Airport Santa Ana 31383 KSUU Travis Air Force Base Fairfield 31617 KVBG Vandenberg Air Force Base Lompoc
ORDER BY
select *
from airport_freq
where airport_ident = 'KLAX'
order by type ;
airport_freq[airport_freq.airport_ident == 'KLAX' ].sort_values( 'type' )
id airport_ref airport_ident type description frequency_mhz 11952 60767 3632 KLAX APP SOCAL APP 36.07 11953 60766 3632 KLAX APP SOCAL APP 124.30 11954 60768 3632 KLAX ATIS ATIS 133.80 11955 60769 3632 KLAX CLD CLNC DEL 121.40 11956 60770 3632 KLAX DEP SOCAL DEP 124.30 11957 60771 3632 KLAX GND GND 121.65 11958 60772 3632 KLAX MISC CG 34.50 11959 60773 3632 KLAX MISC CG 898.40 11960 60774 3632 KLAX OPS AF 37.22 11961 60775 3632 KLAX TWR TWR 119.80 11962 60776 3632 KLAX UNIC UNICOM 122.95
select *
from airport_freq
where airport_ident = 'KLAX'
order by type desc ;
airport_freq[airport_freq.airport_ident == 'KLAX' ].sort_values( 'type' , ascending = False )
id airport_ref airport_ident type description frequency_mhz 11962 60776 3632 KLAX UNIC UNICOM 122.95 11961 60775 3632 KLAX TWR TWR 119.80 11960 60774 3632 KLAX OPS AF 37.22 11958 60772 3632 KLAX MISC CG 34.50 11959 60773 3632 KLAX MISC CG 898.40 11957 60771 3632 KLAX GND GND 121.65 11956 60770 3632 KLAX DEP SOCAL DEP 124.30 11955 60769 3632 KLAX CLD CLNC DEL 121.40 11954 60768 3632 KLAX ATIS ATIS 133.80 11952 60767 3632 KLAX APP SOCAL APP 36.07 11953 60766 3632 KLAX APP SOCAL APP 124.30
IN… NOT IN
select *
from airports
where type in ( 'heliport' , 'balloonport' );
airports[airports.type.isin([ 'heliport' , 'balloonport' ])]
id ident type name latitude_deg longitude_deg elevation_ft continent iso_country iso_region municipality scheduled_service gps_code iata_code local_code home_link wikipedia_link keywords 0 6523 00A heliport Total Rf Heliport 40.070801 -74.933601 11.0 NaN US US-PA Bensalem no 00A NaN 00A NaN NaN NaN 9 322658 00CN heliport Kitchen Creek Helibase Heliport 32.727374 -116.459742 3350.0 NaN US US-CA Pine Valley no 00CN NaN 00CN NaN NaN NaN 12 6532 00FD heliport Ringhaver Heliport 28.846600 -82.345398 25.0 NaN US US-FL Riverview no 00FD NaN 00FD NaN NaN NaN 15 6535 00GE heliport Caffrey Heliport 33.884201 -84.733902 957.0 NaN US US-GA Hiram no 00GE NaN 00GE NaN NaN NaN 16 6536 00HI heliport Kaupulehu Heliport 19.832715 -155.980233 43.0 NaN US US-HI Kailua-Kona no 00HI NaN 00HI NaN NaN NaN ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... 56988 312611 ZA-0117 heliport Pumba Helipad -33.387799 26.410595 NaN AF ZA ZA-EC Nelson Mandela Bay no NaN NaN NaN http://www.pumbagamereserve.co.za/amenities NaN NaN 56990 312624 ZA-0119 heliport Cape Town Heliport -33.901018 18.425936 NaN AF ZA ZA-WC NaN no NaN NaN NaN NaN NaN NaN 57011 318475 ZA-0140 heliport Kuruman Hospital Heliport -27.459904 23.443762 NaN AF ZA ZA-NC Kuruman no NaN NaN NaN NaN NaN NaN 57108 301278 ZIZ heliport Zamzama Heliport 26.710944 67.667250 128.0 AS PK PK-SD Zamzama Gas Field no NaN ZIZ NaN NaN NaN NaN 57373 46378 ZZ-0001 heliport Sealand Helipad 51.894444 1.482500 40.0 EU GB GB-ENG Sealand no NaN NaN NaN http://www.sealandgov.org/ https://en.wikipedia.org/wiki/Principality_of_... Roughs Tower Helipad
12031 rows × 18 columns
select *
from airports
where type not in ( 'heliport' , 'balloonport' );
airports[ ~ airports.type.isin([ 'heliport' , 'balloonport' ])]
id ident type name latitude_deg longitude_deg elevation_ft continent iso_country iso_region municipality scheduled_service gps_code iata_code local_code home_link wikipedia_link keywords 1 323361 00AA small_airport Aero B Ranch Airport 38.704022 -101.473911 3435.0 NaN US US-KS Leoti no 00AA NaN 00AA NaN NaN NaN 2 6524 00AK small_airport Lowell Field 59.949200 -151.695999 450.0 NaN US US-AK Anchor Point no 00AK NaN 00AK NaN NaN NaN 3 6525 00AL small_airport Epps Airpark 34.864799 -86.770302 820.0 NaN US US-AL Harvest no 00AL NaN 00AL NaN NaN NaN 4 6526 00AR closed Newport Hospital & Clinic Heliport 35.608700 -91.254898 237.0 NaN US US-AR Newport no NaN NaN NaN NaN NaN 00AR 5 322127 00AS small_airport Fulton Airport 34.942803 -97.818019 1100.0 NaN US US-OK Alex no 00AS NaN 00AS NaN NaN NaN ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... 57370 27244 ZYYJ medium_airport Yanji Chaoyangchuan Airport 42.882801 129.451004 624.0 AS CN CN-22 Yanji yes ZYYJ YNJ NaN NaN https://en.wikipedia.org/wiki/Yanji_Chaoyangch... NaN 57371 317861 ZYYK medium_airport Yingkou Lanqi Airport 40.542524 122.358600 0.0 AS CN CN-21 Yingkou yes ZYYK YKH NaN NaN https://en.wikipedia.org/wiki/Yingkou_Lanqi_Ai... NaN 57372 32753 ZYYY medium_airport Shenyang Dongta Airport 41.784401 123.496002 NaN AS CN CN-21 Shenyang no ZYYY NaN NaN NaN NaN NaN 57374 307326 ZZ-0002 small_airport Glorioso Islands Airstrip -11.584278 47.296389 11.0 AF TF TF-U-A Grande Glorieuse no NaN NaN NaN NaN NaN NaN 57375 313629 ZZZZ small_airport Satsuma Iōjima Airport 30.784722 130.270556 338.0 AS JP JP-46 Mishima-Mura no RJX7 NaN RJX7 NaN http://wikimapia.org/6705190/Satsuma-Iwo-jima-... SATSUMA,IWOJIMA,RJX7
45345 rows × 18 columns
GROUP BY, COUNT, SUM, ORDER BY
select iso_country,
type ,
count ( * )
from airports
group by iso_country,
type
order by iso_country,
type ;
airports.groupby([ 'iso_country' , 'type' ]).size()
iso_country type
AD heliport 2
AE closed 1
heliport 21
large_airport 4
medium_airport 7
...
ZM small_airport 94
ZW closed 2
large_airport 1
medium_airport 8
small_airport 128
Length: 850, dtype: int64
select iso_country,
type ,
count ( * )
from airports
group by iso_country,
type
order by iso_country,
count ( * ) desc ;
airports.groupby([ 'iso_country' , 'type' ]).size() \
.to_frame( 'count' ).reset_index() \
.sort_values([ 'iso_country' , 'count' ], ascending = [ True , False ])
iso_country type count 0 AD heliport 2 2 AE heliport 21 6 AE small_airport 16 4 AE medium_airport 7 3 AE large_airport 4 ... ... ... ... 843 ZM large_airport 1 849 ZW small_airport 128 848 ZW medium_airport 8 846 ZW closed 2 847 ZW large_airport 1
850 rows × 3 columns
이건 좀 더 봐야겠다
airports.groupby([ 'iso_country' , 'type' ]).sum()
id latitude_deg longitude_deg elevation_ft iso_country type AD heliport 360076 85.057431 3.052711 3450.0 AE closed 42256 23.298100 54.221699 0.0 heliport 6389256 519.481986 1140.423719 706.0 large_airport 316020 99.910756 220.694089 375.0 medium_airport 347052 172.974312 384.824277 1406.0 ... ... ... ... ... ... ZM small_airport 11283607 -1294.718014 2640.091073 255681.0 ZW closed 654955 -34.870921 54.895665 6779.0 large_airport 3005 -17.931801 31.092800 4887.0 medium_airport 24058 -151.194701 234.847004 26744.0 small_airport 19701085 -2384.126237 3832.335854 273732.0
850 rows × 4 columns
HAVING
select type ,
count ( * )
from airports
where iso_country = 'US'
group by type
having count ( * ) > 1000
order by count ( * ) desc ;
s = airports[airports.iso_country == 'US' ] \
.groupby( 'type' ).filter( lambda g: len (g) > 1000 ) \
.groupby( 'type' ).size() \
.sort_values( ascending = False )
type (s)
pandas.core.series.Series
s
type
small_airport 13565
heliport 6316
closed 1930
dtype: int64
s.heliport
6316
Top N records
select iso_country
from by_country
order by size desc
limit 10 ;
by_country = airports.groupby([ 'iso_country' ]).size()\
.to_frame( 'airport_count' ).reset_index()
df = by_country.nlargest( 10 , columns = 'airport_count' )
df
iso_country airport_count 224 US 23260 29 BR 4987 35 CA 2796 12 AU 2019 152 MX 1405 117 KR 1374 74 GB 1203 184 RU 1121 54 DE 957 72 FR 894
select iso_country
from by_country
order by size desc
limit 10
offset 10 ;
by_country.nlargest( 10 , columns = 'airport_count' ).tail( 5 )
iso_country airport_count 117 KR 1374 74 GB 1203 184 RU 1121 54 DE 957 72 FR 894
Aggregate functions (MIN, MAX, MEAN)
select max (length_ft),
min (length_ft),
avg (length_ft),
median(length_ft) -- 사용자정의 함수로 만들었다 치고
from runways;
runways.head( 3 )
id airport_ref airport_ident length_ft width_ft surface lighted closed le_ident le_latitude_deg le_longitude_deg le_elevation_ft le_heading_degT le_displaced_threshold_ft he_ident he_latitude_deg he_longitude_deg he_elevation_ft he_heading_degT he_displaced_threshold_ft 0 269408 6523 00A 80.0 80.0 ASPH-G 1 0 H1 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 1 255155 6524 00AK 2500.0 70.0 GRVL 0 0 N NaN NaN NaN NaN NaN S NaN NaN NaN NaN NaN 2 254165 6525 00AL 2300.0 200.0 TURF 0 0 01 NaN NaN NaN NaN NaN 19 NaN NaN NaN NaN NaN
runways.describe()
id airport_ref length_ft width_ft lighted closed le_latitude_deg le_longitude_deg le_elevation_ft le_heading_degT le_displaced_threshold_ft he_latitude_deg he_longitude_deg he_elevation_ft he_heading_degT he_displaced_threshold_ft count 41639.000000 41639.000000 41443.000000 38981.000000 41639.000000 41639.000000 14449.000000 14444.000000 12235.000000 14228.000000 2819.000000 14434.000000 14436.000000 12118.000000 16043.000000 3129.000000 mean 257324.082183 34094.595163 3262.173684 110.520125 0.260741 0.015562 31.197196 -40.312925 1060.108868 104.374123 534.013480 31.194157 -40.256715 1054.425400 261.749082 536.128476 std 21373.781124 68108.310531 2735.615483 231.510362 0.439044 0.123776 23.021831 79.858731 1465.765774 57.292638 471.867077 23.019014 79.844299 1458.619056 78.508982 466.220434 min 232758.000000 2.000000 0.000000 0.000000 0.000000 0.000000 -75.597200 -178.303000 -1246.000000 0.000000 1.000000 -75.595400 -178.292000 -1210.000000 0.000000 1.000000 25% 243178.500000 8543.500000 1700.000000 59.000000 0.000000 0.000000 28.796800 -96.667350 117.000000 57.000000 229.500000 28.788800 -96.642750 115.000000 226.000000 228.000000 50% 253593.000000 18692.000000 2743.000000 75.000000 0.000000 0.000000 37.591200 -80.403000 571.000000 98.700000 400.000000 37.579950 -80.368750 565.500000 270.000000 400.000000 75% 264015.500000 27430.500000 4241.000000 100.000000 1.000000 0.000000 44.277400 15.322125 1247.000000 148.000000 685.000000 44.272275 15.366575 1237.000000 316.000000 699.000000 max 335591.000000 335590.000000 120000.000000 9000.000000 1.000000 1.000000 82.512800 179.337000 13202.000000 360.000000 6365.000000 82.522800 179.343000 13314.000000 363.000000 5001.000000
# df = runways.agg({'length_ft': ['min', 'max', 'mean', 'median', 'count', 'std']})
df = runways.agg({ 'airport_ref' : [ 'min' , 'max' , 'mean' , 'median' , 'count' , 'std' ]})
df
airport_ref min 2.000000 max 335590.000000 mean 34094.595163 median 18692.000000 count 41639.000000 std 68108.310531
df.T
min max mean median count std airport_ref 2.0 335590.0 34094.595163 18692.0 41639.0 68108.310531
JOIN
select airport_ident,
a . type ,
a . description ,
frequency_mhz
from airport_freq as a join airports as b
on airport_freq . airport_ref = airports . id
where airports . ident = 'KLAX'
airport_freq.merge(
airports[airports.ident == 'KLAX' ][[ 'id' ]],
left_on = 'airport_ref' ,
right_on = 'id' ,
how = 'inner'
)[[ 'airport_ident' , 'type' , 'description' , 'frequency_mhz' ]]
airport_ident type description frequency_mhz 0 KLAX APP SOCAL APP 36.07 1 KLAX APP SOCAL APP 124.30 2 KLAX ATIS ATIS 133.80 3 KLAX CLD CLNC DEL 121.40 4 KLAX DEP SOCAL DEP 124.30 5 KLAX GND GND 121.65 6 KLAX MISC CG 34.50 7 KLAX MISC CG 898.40 8 KLAX OPS AF 37.22 9 KLAX TWR TWR 119.80 10 KLAX UNIC UNICOM 122.95
pd.merge(
airport_freq,
airports[airports.ident == 'KLAX' ][[ 'id' ]],
left_on = 'airport_ref' ,
right_on = 'id' ,
how = 'inner'
)[[ 'airport_ident' , 'type' , 'description' , 'frequency_mhz' ]]
airport_ident type description frequency_mhz 0 KLAX APP SOCAL APP 36.07 1 KLAX APP SOCAL APP 124.30 2 KLAX ATIS ATIS 133.80 3 KLAX CLD CLNC DEL 121.40 4 KLAX DEP SOCAL DEP 124.30 5 KLAX GND GND 121.65 6 KLAX MISC CG 34.50 7 KLAX MISC CG 898.40 8 KLAX OPS AF 37.22 9 KLAX TWR TWR 119.80 10 KLAX UNIC UNICOM 122.95
UNION ALL and UNION
select name ,
municipality
from airports
where ident = 'KLAX'
union all
select name ,
municipality
from airports
where ident = 'KLGB' ;
airports[airports.ident == 'KLAX' ][[ 'name' , 'municipality' ]]
name municipality 29141 Los Angeles International Airport Los Angeles
airports[airports.ident == 'KLGB' ][[ 'name' , 'municipality' ]]
name municipality 29166 Long Beach /Daugherty Field/ Airport Long Beach
pd.concat([
airports[airports.ident == 'KLAX' ][[ 'name' , 'municipality' ]],
airports[airports.ident == 'KLGB' ][[ 'name' , 'municipality' ]]
])
name municipality 29141 Los Angeles International Airport Los Angeles 29166 Long Beach /Daugherty Field/ Airport Long Beach
INSERT
create table heroes (id integer , name text );
insert into heroes values ( 1 , 'Harry Potter' );
insert into heroes values ( 2 , 'Ron Weasley' );
insert into heroes values ( 3 , 'Hermione Granger' );
df1 = pd.DataFrame({ 'id' : [ 1 , 2 ], 'name' : [ 'Harry Potter' , 'Ron Weasley' ]})
df1
id name 0 1 Harry Potter 1 2 Ron Weasley
df2 = pd.DataFrame({ 'id' : [ 3 ], 'name' : [ 'Hermione Granger' ]})
df2
pd.concat([df1, df2]).reset_index( drop = True )
id name 0 1 Harry Potter 1 2 Ron Weasley 2 3 Hermione Granger
UPDATE
update airports
set home_link = 'http://www.lawa.org/welcomelax.aspx'
where ident == 'KLAX' ;
airports[airports[ 'ident' ] == 'KLAX' ][[ 'home_link' ]]
# airports[airports.ident == 'KLAX'][['home_link']]
home_link 29141 https://www.flylax.com/
airports.loc[airports[ 'ident' ] == 'KLAX' , 'home_link' ]
29141 https://www.flylax.com/
Name: home_link, dtype: object
airports.loc[airports[ 'ident' ] == 'KLAX' , 'home_link' ] = 'http://www.lawa.org/welcomelax.aspx'
airports.loc[airports[ 'ident' ] == 'KLAX' , 'home_link' ]
29141 http://www.lawa.org/welcomelax.aspx
Name: home_link, dtype: object
airports.loc[airports.type == 'heliport' , 'home_link' ] = 'http://haha'
airports[airports.type == 'heliport' ][[ 'ident' , 'name' , 'home_link' ]]
ident name home_link 0 00A Total Rf Heliport http://haha 9 00CN Kitchen Creek Helibase Heliport http://haha 12 00FD Ringhaver Heliport http://haha 15 00GE Caffrey Heliport http://haha 16 00HI Kaupulehu Heliport http://haha ... ... ... ... 56988 ZA-0117 Pumba Helipad http://haha 56990 ZA-0119 Cape Town Heliport http://haha 57011 ZA-0140 Kuruman Hospital Heliport http://haha 57108 ZIZ Zamzama Heliport http://haha 57373 ZZ-0001 Sealand Helipad http://haha
12007 rows × 3 columns
Immutability
airports.home_link = 'http//hoho' # work
airports.home_link
0 http//hoho
1 http//hoho
2 http//hoho
3 http//hoho
4 http//hoho
...
57371 http//hoho
57372 http//hoho
57373 http//hoho
57374 http//hoho
57375 http//hoho
Name: home_link, Length: 57376, dtype: object
airports[airports[ 'ident' ] == 'KLAX' ].home_link = 'http//haha' # not work
airports[airports[ 'ident' ] == 'KLAX' ].home_link
d:\bin\miniconda3\envs\blog\lib\site-packages\pandas\core\generic.py:5159: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
self[name] = value
29141 http//hoho
Name: home_link, dtype: object
DELETE
delete from airport_freq
where type = 'MISC' ;
# 다시 깨끗하게 로딩하자
airport_freq = pd.read_csv( 'https://ourairports.com/data/airport-frequencies.csv' )
airport_freq[airport_freq.type != 'MISC' ]
id airport_ref airport_ident type description frequency_mhz 0 70518 6528 00CA CTAF CTAF 122.90 1 307581 6589 01FL ARCAL NaN 122.90 2 75239 6589 01FL CTAF CEDAR KNOLL TRAFFIC 122.80 3 60191 6756 04CA CTAF CTAF 122.90 4 59287 6779 04MS UNIC UNICOM 122.80 ... ... ... ... ... ... ... 28903 51247 27242 ZYTL ATIS ATIS 126.65 28904 51248 27242 ZYTL TWR DALIAN TWR 118.25 28905 51243 27243 ZYTX ATIS ATIS 127.45 28906 51244 27243 ZYTX TWR SHENYANG TWR 118.10 28907 51190 27244 ZYYJ TWR TWR 130.00
26683 rows × 6 columns
airport_freq.drop(airport_freq[airport_freq.type == 'MISC' ].index)
id airport_ref airport_ident type description frequency_mhz 0 70518 6528 00CA CTAF CTAF 122.90 1 307581 6589 01FL ARCAL NaN 122.90 2 75239 6589 01FL CTAF CEDAR KNOLL TRAFFIC 122.80 3 60191 6756 04CA CTAF CTAF 122.90 4 59287 6779 04MS UNIC UNICOM 122.80 ... ... ... ... ... ... ... 28903 51247 27242 ZYTL ATIS ATIS 126.65 28904 51248 27242 ZYTL TWR DALIAN TWR 118.25 28905 51243 27243 ZYTX ATIS ATIS 127.45 28906 51244 27243 ZYTX TWR SHENYANG TWR 118.10 28907 51190 27244 ZYYJ TWR TWR 130.00
26683 rows × 6 columns