Basic select queries
query: retrieve/extract data
but sometimes more generally: any data manipulation operation, e.g. insert/update/delete "queries" 
   (which often involve an implicit or explicit selection)

result of querying a table is a table.
A table can be queried to produce a table that can be queried to
  produce a table that can be queried...
The resultset table can be stored back in the DB as a table.

ad-hoc query: on-the-fly interactively, spur-of-the-moment, one-time-use.
programmatic: embedded in a program/script/procedure.

SQL is declarative/nonprocedural (What is wanted, not How to do it)

**********************
Basic SELECT statement:

SELECT values,to,display,columns,constants,expressions
 FROM source_table(s)
 WHERE row_specifying_expression/restriction/filter
 GROUP BY how_to_group
  HAVING expresion
 ORDER BY how_to_sort
 LIMIT row_count;

--all clauses are optional
--if a clause is used, it must be in this order

--evaluate expressions (no retrieval from any table)
SELECT Sqrt(2), Now(), 1/0, Round(123.456,2), Length('asdfqwerty'), Left('asdfqwerty',4);

Usually, query a table: 
SELECT * FROM t                --all columns, in declared order    t.*
SELECT f1,f2 ...               --fields f1 and f2 only of selected rows
SELECT DISTINCT f1...          --unique f1 values
SELECT DISTINCT f1,f2...       --unique f1,f2 combos

--Limit number of rows returned. esp. useful on Windows, w/out pager.
  reduce network traffic.
SELECT ... LIMIT 5;
           LIMIT skip,count       #arbitrary section of result set

--sorted order
SELECT ... ORDER BY f1
SELECT ... ORDER BY f1 DESC    --descending order
SELECT ... ORDER BY f1,f2      --primary and secondary sorts
SELECT ... ORDER BY f1 DESC,f2  --primary descending, secondary ascending
  ??cannot Order By Count(*)   [use Alias]  [no, now you can sort by Count(*)]
  can force non-binary Char/Varchar column fi to sort binary: 
      Order By BINARY fi
  Enum values sort by their internal numeric value (position in list),
      Order By CAST(enum_col AS CHAR)   for string sort.
  Set values sort based on subset's bitmap value (combo of members).
  Nulls sort as smallest
select ... Order By f1 Limit 1;          #smallest f1 row
select ... Order By f1 Desc Limit 1;     #largest f1 row
The Order By value doesn't have to be in the column list.
The Order By value can be a derived value.

--WHERE clause filters rows
SELECT ... FROM t WHERE f1 relOp value [AND|OR...]]
  relOp: < <= > >=  = (single =) !=  (<> too)    
 select * from City where Population>=5000000 order by CountryCode,Population desc;
              
--pattern matching:
  WHERE f1 [NOT] LIKE [BINARY] 'string'        
  case-insensitive unless BINARY
  'string' contains pattern matching/wildcard operators:
                   % 0 or more chars,  
                   _ single char:
                       "j%"	#starts with j
		       "%j%"    #contains j   
                       "j____"  #5 chars, starts with j
  Like can be used with non-strings:  birth LIKE '20%'  #dates starting 20..

--testing for Null.  = and != cannot be used
  WHERE f1 IS [NOT] NULL
 select Name from Country where IndepYear is null;

--range test
  WHERE f1 [NOT] BETWEEN x AND y         equivalent to: f1>=x AND f1<=y
 select Name,IndepYear from Country where IndepYear between 1800 and 1899;

--set membership
  WHERE f1 [NOT] IN (val1,val2,...)      equivalent to: f1=val1 OR f1=val2 OR...
                     vals can be columns or expressions too

--alternate/alias column name.   
 if can't/don't want to change field name OR for derived column (see below)
SELECT f1 AS newalias, f2 AS meaningfulname FROM t 
    [ORDER BY meaningfulname]
  alias can not be used in WHERE clause
  AS is optional
  ' " ` quoting if is keyword, contains non-alphanumeric chars, or is all-digits.

--calculated/derived values/columns 
SELECT f1*f2+sqrt(f3),f4-2*f5  FROM t....
 select Name,SurfaceArea,Population,Population/SurfaceArea as Density from Country order by Density desc;
 select Name,round(GNP*1000000) as GNP,Population,round(GNP*1000000/Population) as GNP_per_capita 
   from Country order by GNP_per_capita desc;

--counts:
SELECT COUNT(*) ...          --number of rows
SELECT COUNT(f1) ...	     --number of non-null f1 values
SELECT COUNT(DISTINCT f1)... --number of different non-null f1 values
SELECT COUNT(DISTINCT f1,f2)...	     --number of non-null f1,f2 combos
 select count(*),count(Name),count(distinct Name) from City;

--aggregate/column functions: summarize an entire column
SELECT SUM(f1)...
       AVG(f1)...
       MIN(f1)...
       MAX(f1)...
Also:  Variance(f1)   Stddev(f1)  Group_concat(f1)
  can be derived column/expression:  Sum(100*f1/f2)
  Nulls ignored.
  cannot be in Where clause:  !! select * from pet where birth=max(birth);
  if result set is empty or only nulls, aggregate function is Null (count is 0)
 select min(weight),max(weight),avg(weight),count(weight) from pet;
 select count(*),min(Population),max(Population),sum(Population),avg(Population) from City;

--grouping/subtotals.  summarize column per group of rows (or combos).
  output is sorted by the grouping column(s)
SELECT f1,aggFunc(f2) FROM t GROUP BY f1
SELECT f1,f2,aggFunc(f3) FROM t GROUP BY f1,f2
 select species,count(*) from pet group by species;
 select species,count(*),avg(weight) from pet group by species;
 select species,sex,count(*) from pet group by species,sex;
 select species,sex,avg(weight) from pet group by species,sex;

 select GROUP_CONCAT(Language) From CountryLanguage Where Countrycode="USA"
 select countrycode,GROUP_CONCAT(Language) From CountryLanguage Group By countrycode;
 select species,group_concat(name) from pet group by species;
 select species,group_concat(distinct owner order by owner separator ' ' ) 
   from pet group by species;
 select owner,group_concat(name, weight) from pet group by owner;


--cannot mix column and aggregate function in select list without Group By:
  //syntax error? or now rubbish:  !! select f1,aggFunc(f2) from t    
--unpredictable/unreadable if display columns not in Group By:
  !! select f1,aggregateFunction(f2) from t group by f3
  !! select f1,f4,aggregateFunction(f2) from t group by f1
 #smallest city of each country: (max looks right because each state's cities are in population order!)
 !! select CountryCode,Name,min(Population) from City  group by CountryCode;
 #smallest city of each USA state: 
 !! select District,Name,min(Population) from City  where CountryCode='USA' group by District;
 These need to be done using subquery (see below)

--frequencies of each different f1 value
SELECT f1,COUNT(*) FROM t GROUP BY f1
 #number of cities of each country:
 select CountryCode,count(*) from City group by CountryCode  order by count(*) desc;
 #number of countries of each language:
 select Language,count(*) from CountryLanguage group by Language order by count(*) desc;
--frequency of each f1,f2 combo
SELECT f1,f2,COUNT(*) FROM t GROUP BY f1,f2    

--grouping without any aggregate function is equivalent to Distinct
Select Distinct fi From t
Select fi From t Group By fi
  select distinct Continent from Country;
  select Continent from Country group by Continent;


--HAVING clause modifies Group By clause (further constrains the
  groups to those with particular summary characteristics)
SELECT f1,aggfunc(f2) FROM t GROUP BY f1 HAVING aggfunc(f2) relOp value
  can be different aggregate functions.
find duplicate fi's:
SELECT fi... FROM t GROUP BY fi HAVING COUNT(*)>1
 select species,count(*) from pet group by species having count(*)>2;
 select species from pet group by species having avg(weight)>20;
 select owner from pet group by owner having count(*)=1;
 #species of exactly 2 pets:
 select species,count(*) from pet group by species having 2=count(*);
Having doesn't have to have an aggregate function, 
 it can be a restriction on the Group By column:
  select species,count(*) from pet group by species having species!='snake';

 #city names occuring more than once:
 select Name,CountryCode,count(*) from City group by Name having count(*)>1;
 #'countries' that have only one city:
 select CountryCode,Name from City group by CountryCode having count(*)=1;

HAVING and WHERE sometimes interchangable in GROUP BY if the condition does not
  involve an aggregate function and does involve the grouping column:
SELECT ... FROM t GROUP BY fi HAVING condition
SELECT ... FROM t WHERE condition GROUP BY fi
 select species,count(*) from pet where species='cat' or species='dog' group by species; 
 select species,count(*) from pet group by species having species='cat' or species='dog';
More efficient to use the Where version; it eliminates rows earlier.


--WITH ROLLUP  includes summary (subtotal) for each group (i.e. super-aggregate).
Multiple levels of analysis with single query:  OLAP support.
one grouping, get summary of it at end:
SELECT f1,aggFunc(f2) FROM t GROUP BY f1 WITH ROLLUP
two groupings, get summary of each combo and total summary at end:
SELECT f1,f2,aggFunc(f3) FROM t GROUP BY f1,f2 WITH ROLLUP
 limitations: Order By cannot be used (but can use Group By ..DESC),
              Limit applied after Rollup,
	      the Nulls in the summary rows cannot be filtered.              
 select species,sex,count(*) from pet group by species,sex  with rollup;
 select species,sex,sum(weight) from pet group by species,sex  with rollup;
 #USA's states' #cities and total population:
 select District,count(*),sum(Population) from City where CountryCode='USA' group by District with rollup;
 #USA's states' cities' population with each state's #cities and total population:
 select District,Name,count(*),sum(Population) from City where CountryCode='USA' 
   group by District,Name  with rollup;
 #count and population of each district of each country:
 select CountryCode,District,count(*),sum(Population) from City  group by CountryCode,District with rollup;

the rollup value(s) are calculated on every row, not the grouped by values:


***********************************************************************
Subqueries/nested queries.
--a Select within another statement (Select, Insert, Update, Delete, Create).
--is why SQL is "structured" query language.

SELECT .. FROM .. WHERE f1 relOp (SELECT f2 FROM .. [WHERE..][GROUP BY..[HAVING..]])
  parens required around inner Select, which runs before outer Select.

Usually, subquery produces one column, thus it has only one Select item (f2),
   which is often an aggregate function:
  SELECT .. FROM .. WHERE f1 relOp (SELECT aggFunc(f2) FROM...)

Recall that an Aggregate function can not be in Where clause:
  !! select * from pet where dob=max(dob);
So use subquery: Row holding maximum of certain field:
 select * from pet where dob=(select max(dob) from pet);
 select * from pet where dob=(select max(dob) from pet where species='cat');
Note these can be done without subquery by using Order By and Limit 1:
 select * from pet order by dob desc limit 1;

 #row of largest weight either subquery OR sort and first:
 select * from pet order by weight desc limit 1; 
 select * from pet where weight=(select max(weight) from pet); 

 select * from City where Population=(select max(Population) from City);
 select * from City order by Population desc limit 1;

These can not be done without subquery (or two separate queries):
Rows relOp an aggregate value:
 select * from pet where dob>(select avg(dob) from pet);
 #pets of the most common species:
 select * from pet where species=(select species from pet group by species order by count(*) desc limit 1);
 #cities whose population is > 10 times the average city poplation:
 select * from City where Population>10*(select avg(Population) from City);
Rows relOp a particular row's column value:
 select * from pet where dob>(select dob from pet where name='claws');
 select * from pet where cuteness>(select max(cuteness) from pet group by owner having owner='Gwen');
 #countries whose independance preceded USA's:
 select Name,IndepYear from Country where IndepYear<(select IndepYear from Country where Code='USA');
 #pets that are > 25% of total pets weight:
 select * from pet where weight>.25*(select sum(weight) from pet);
 #countries whose population is > 1% of the world's population:
 select Name,Population from Country where Population>.01*(select sum(Population) from Country);

 #the countries that have the triplicate+ city names:  slow!
 select CountryCode,Name from City where Name in (select Name from City group by Name having count(*)>2);

Rows holding group-wise maximum/minimum of a certain field.
 #least cute pet of each owner: (WRONG. name is not being Grouped By, so it will be mush):
 !! select owner,name,min(cuteness) from pet  group by owner;
 #smallest city of each country: (max looks right because each state's cities are in population order!)
 !! select CountryCode,Name,min(Population) from City  group by CountryCode;
 #smallest city of each USA state: 
 !! select District,Name,min(Population) from City  where CountryCode='USA' group by District;

 Instead, must do this:
 #each owner's cutest pet:
 select * from pet as p1 
   where cuteness=(select max(cuteness) from pet as p2 where p1.owner=p2.owner);
 #countries of largest surface area per continent:
 select Continent,Name,SurfaceArea from Country as c1 
   where SurfaceArea=(select max(SurfaceArea) from Country as c2 where c1.Continent=c2.Continent);
 Note that: select Continent,Name,max(SurfaceArea) from Country group by Continent;
  will not have the correct Name because Name is not being Grouped By.

 
"Correlated" subquery contains reference to outer query value.
this kind of subquery cannot run by itself(?) runs for each outer query row?

 #species that occur exactly twice:  ??? WRONG SKIP
 select * from pet as t where 2=(select count(*) from pet where pet.species=t.species);

values that are [not] in another table:
SELECT .. FROM t1 WHERE f1 [NOT] IN (SELECT f2 FROM t2)
 select * from pet where name not in (select name from events);

?? Cannot have Order By in subquery (would be useless anyway, since result is not seen).
 Now not an error to have Order By in subquery, but is still useless.

  subquery can be another table:
SELECT .. FROM t1 WHERE fi relOp (SELECT .. FROM t2 ..)
 select * from pet where birth<(select min(date) from events);

  the result of a  subquery that returns 0 values is Null.
  comparing anything against null is Null:
SELECT .. FROM t1 WHERE fi = (subquery is empty set)
 select owner from pet where name in (select name from events where type='lobotomy');

subquery can return scalar, one column, one row, or table.

scalar subquery can be used wherever a scalar can be used.
 select (select sum(Population) from City) / (select sum(Population) from Country);

Quantified comparisons: column subquery of several rows
ALL  if comparison is true for every value of subquery
ANY|SOME     .. ..             >=1   ....

 #countries whose population is more than every continents average country population:
 select Name, Population from Country where Population>ALL(select avg(Population ) from Country group by Continent);
 #European countries where Spanish is spoken:
 select Name from Country where Code=ANY(select CountryCode from CountryLanguage where Language ='spanish') 
  and Continent='europe';

=ANY(subquery) equivalent to IN(subquery)
<>ANY(subquery) not equivalent to NOT IN(subquery)
<>ALL(subquery) equivalent to NOT IN(subquery)

EXISTS(subquery) returns true if any results, false if none.  no data returned.
 #European countries where Spanish is spoken, again:
 select Code as c, Name from Country 
  where EXISTS(select * from CountryLanguage where CountryCode=c and Language ='spanish') 
   and Continent='europe' ;


subquery can be in Select clause:
SELECT fi,fj*(SELECT Avg(fk) FROM t) FROM t
 select name,weight,weight-(select avg(weight) from pet) from pet;

  subquery can be in From clause (inline view). temporary result table created:
  alias is required because interim result table is referenced by outer query:
SELECT * FROM (SELECT * FROM t WHERE ...) AS temp...   ??
#average continental population:
select avg(contPop) from (select Continent, sum(Population) as contPop from Country group by Continent) as t;
#languages spoken in only one country:
select count(*) from (select count(*) from countrylanguage group by language having count(*)=1) as temp;
  
subquery gets the table definition for a Create Table:
CREATE TABLE t2 SELECT * FROM t1 WHERE 0;  #but not t1's indexes, auto_increment
CREATE TABLE t2 SELECT * FROM t1;	   #copy data too

  subquery gets the rows from another table to insert into a table:
INSERT INTO t SELECT ... FROM t2...;   #no parens
 insert into t select name,weight from pet;  #

  subquery used in Update:  must be from different table
UPDATE t SET fi=expr WHERE fj relOp (SELECT .. FROM t2)  #
  update pet set weight=weight+100 where birth<(select avg(eventdate)
                                                from events)

  subquery used in Delete: must be from different table
DELETE FROM t WHERE fi relOp (SELECT .. FROM t2)
  delete from pet where birth<(select avg(eventdate) from events)


subquery as simpler, more readable, easier to maintain than equivalent complex join or union ?
join often more efficient?

***********************************************************************

Joining multiple tables
two (or more) tables are joined together, typically on common fields e.g. PK and FK.
"Heart of SQL", "the real power behind the relational database".
Form pairs of rows by matching the contents of related columns.
Joins exercise the data relationships between tables. Are the only way
 to exercise cross-table relationships.

"inner join": only returns matched rows. 
  "natural join": selects only the rows with common values in the common attribute.
  "equijoin": the joining condition explicitly stated, equality of any columns, duplicates not eliminated.
  "theta join": like equijoin but inequality <,> etc. Uncommon.

SELECT ... FROM t1,t2 WHERE t1.fi=t2.fj ...
The From clause lists the tables being joined.  The Where clause
 specifies the matching condition, and possibly further restrictions.
Alternate syntaxes:
SELECT ... FROM t1 INNER JOIN t2 ON fi=fj ...
SELECT ... FROM t1 INNER JOIN t2 USING(fi) ...  
  #if joining columns fi have same name in both tables.  also, don't need to disambiguate.

order of t1,t2 makes no difference.

Any columnname that's in both tables must be qualified by tablename to
 disambiguate it:  t1.fi  if there's a fi in t2 too.
Sometimes qualifiers helpful for readability.

Any pair of columns from the two tables can serve as matching columns,
 provided they have comparable data types but most often the common
 fields are the primary key of "parent" table and foreign key in the
 "child" table that references that primary key.  One-to-many, parent/child relationship.
SELECT ...FROM parent,child WHERE parent.pk=child.fk ...

[If the names are the same,?] this is sometimes called "natural join".

Without a Where (or Using etc) clause, Cartesian product is formed
 (all possible pairs/combos of rows from each of the two tables). 
N*M pairs where N and M are number of rows of the two tables, respectively.  
Cartesian product usually not useful! (it's the basis of a join)
SELECT ... FROM t1,t2;
SELECT ... FROM t1 Inner Join t2;

select count(*) from t1,t2;
select * from t1,t2 order by fj desc;
select fi,count(*) from t1,t2 group by fi;
select * from t1,t2 where fi>=2;

select count(*) from pet,event;
select * from pet,event  limit 2;   #notice name is twice
select pet.name,species,sex,type,remark from pet,event;
select * from pet,event;

A join is the Cartesian product with the rows that do not meet the matching 
 column condition removed.  A join is a subset of the Cartesian product.
Examples:
'name' is the common field of the pet and event tables.  Use it to join.
#combine the pet data into each events:
SELECT * FROM pet,events WHERE pet.name=events.name;
#don't need to disambiguate the USING column:
SELECT name,id,type FROM pet INNER JOIN event USING(name);
#sex of the pets that had litters duh
SELECT pet.name,sex,date FROM pet,events WHERE pet.name=events.name AND type='litter';
#owner's whose pets have had birthdays:
SELECT DISTINCT owner FROM pet,events WHERE pet.name=events.name AND type='birthday';
#Harold's pets' events:
SELECT pet.name,type,date FROM pet,events 
 WHERE pet.name=events.name AND owner='harold';
#age of pets on visits:
SELECT pet.name,type,DATEDIFF(date,dob)/365 AS age FROM pet,events 
 WHERE pet.name=events.name ;
#pets who visited when less than year old:
SELECT pet.name,type,dob,date FROM pet,events 
 WHERE pet.name=events.name AND DATEDIFF(date,dob)<365;

'v_code' is the link between the product and vendor tables.
#name of vendor who supplies product:
SELECT product.code,description,name FROM product,vendor 
 WHERE product.v_code=vendor.code;
#products whose vendor's reorder is 'Y':
SELECT product.code, description,name FROM product,vendor 
 WHERE reorder='y' AND product.v_code=vendor.code;
#products per state:
SELECT state,count(*) as `number of products` 
 FROM vendor, product WHERE product.v_code=vendor.code 
 GROUP BY state ORDER BY `number of products`;

#more precisely, city with same name as any country (and vice versa):
SELECT Country.Name FROM Country,City WHERE Country.Name=City.Name;
SELECT Country.Name FROM Country INNER JOIN City ON Country.Name=City.Name;
SELECT Country.Name FROM Country INNER JOIN City USING (Name);
#countries that have a city of the same name: 
SELECT Country.Name FROM Country INNER JOIN City ON Code=Countrycode WHERE Country.Name=City.Name;;

#number of languages per continent:
SELECT Continent, COUNT(Language) AS langCount 
 FROM Country, CountryLanguage 
 WHERE Code=CountryCode GROUP BY Continent;

#number of speakers of each language in each country:
SELECT Name,Language,ROUND(Population*Percentage/100) AS Speakers 
 FROM Country,CountryLanguage  
 WHERE Code=CountryCode ORDER BY Name;

#all languages spoken in countries that have Swedish speakers (join )
SELECT Name, Language, ROUND(Population*Percentage/100) AS Speakers, Percentage 
 FROM Country, CountryLanguage 
 WHERE Code=CountryCode AND CountryCode IN 
    (SELECT CountryCode FROM CountryLanguage WHERE Language='swedish') 
 ORDER BY CountryCode;

#same, but subtotals too.
SELECT Name, Language, ROUND(SUM(Population*Percentage/100)) AS Speakers, Percentage 
 FROM Country, CountryLanguage 
 WHERE Code=CountryCode AND CountryCode IN 
   (SELECT CountryCode FROM CountryLanguage WHERE Language='swedish') 
 GROUP BY Name, Language WITH ROLLUP;

**************

self-join: table joined with itself. 
If need to compare rows to other rows in the same table. 
i.e. for each row, compare it to all others in the same table for a match.
or form all pairs of rows for some processing.
(primary to foreign key not used).
 need table alias[es].

#"Cartesian product": M*M rows
select *               from pet as p1,pet as p2;   # Must have table aliases.
select p1.name,p2.name from pet as p1,pet as p2;

#name "pairs": xy but not yx, xx, yy    "M choose 2"=(M^2-M)/2=M*(M-1)/2 pairs
#i.e. all pairs of rows, but each pair only once and no "self-pairs".
#use primary key or other unique orderable column
select p1.name,p2.name from pet as p1,pet as p2 where p1.name<p2.name;

#Or, using the pet table that has an auto_increment ID primary key field:
... Where p1.id<p2.id

#species pairs:
select p1.name,p1.species,p2.name
 from pet as p1,pet as p2
 where  p1.species=p2.species and p1.name<p2.name;

#breeding pairs (same species, different sex)
SELECT p1.name,p1.species,p1.sex,p2.name,p2.species,p2.sex
 FROM pet AS p1, pet AS p2
 WHERE p1.species=p2.species AND p1.sex='m' AND p2.sex='f';

update pet set cuteness=15 where name='claws';   
#pets of equal cuteness:
Select p1.name,p2.name,p1.cuteness 
  From pet as p1,pet as p2 
  Where p1.name<p2.name and p1.cuteness=p2.cuteness;

#fighting pairs (within 30% of each other's weight)
select p1.name,p1.weight,p2.name,p2.weight
 from pet as p1,pet as p2
 where  p1.weight between .7*p2.weight and 1.3*p2.weight and
       p2.weight between .7*p1.weight and 1.3*p1.weight and
       p1.name<p2.name;

#alphabetical rank order of name
Select p1.name,count(*) as 'alphabetic rank' 
  From pet as p1,pet as p2 
  Where p1.name>=p2.name group by p1.name;

#in order by weight  with rank shown too
Select p1.name,count(*) as 'weight rank' 
  From pet as p1,pet as p2 
  Where p1.weight>=p2.weight  Group by p1.name  Order by `weight rank`;
#Or, to include a derived column that is the ordered ranking:
set @c=0;
select name,weight,@c:=@c+1 as rank from pet order by weight;
#Or, to find a particular's rank:
set @fluffyweight=(select weight from pet where name='fluffy');
select count(*) from pet where weight<=@fluffyweight;

#pairs per owner:
select p1.owner,p1.name,p2.name 
 from pet as p1,pet as p2 
 where p2.name<p1.name and p1.owner=p2.owner 
 order by p1.owner;

#number of pairs per owner:
select p1.owner,count(*) 
 from pet as p1,pet as p2 
 where p2.name<p1.name and p1.owner=p2.owner 
 group by p1.owner;

#(can be done manually, or using variable):
#countries with same independance year as Qatar:
SELECT t1.IndepYear,t1.Name,t2.Name
 FROM Country AS t1, Country AS t2
 WHERE t1.Name='qatar' AND t1.IndepYear=t2.IndepYear;

#South American countries whose surface area is greater than Paraguay's:
select c1.Name, c1.SurfaceArea from Country as c1, Country as c2 
 where c2.Name='paraguay' and c1.SurfaceArea>=c2.SurfaceArea and c1.Continent='South America' 
 order by SurfaceArea;


*************************
"outer join": 
mismatches (rows in one table not matched by any row in the other table) are included too.  
 superset of inner join.

SELECT ... FROM t1 LEFT JOIN t2 ON fi=fj...
  fi is column in t1, fj is column in t2
SELECT ... FROM t1 LEFT JOIN t2 USING(fk)...
  fk is column in both tables
left table t1 is reference table; output is produced for each of its
 rows, those with matching row(s) in the right table (the same as inner
 join) and those with no matching row(s) in the right table t2 and in
 which any selected columns of t2 will be displayed as NULL.
order of the tables matters.

SELECT p_code,p_descript,v_name 
  FROM product LEFT JOIN vendor ON product.v_code=vendor.v_code;
SELECT p_code,p_descript,v_name 
  FROM product LEFT JOIN vendor USING(v_code);

To find *only* the mismatches, use Where clause to restrict result to
 only those rows that have Nulls in right table's column(s):
SELECT p_code,p_descript,v_name 
  FROM product LEFT JOIN vendor USING(v_code) WHERE v_name IS NULL;
(this is the complement of the inner join, 
  ie. inner join + "mismatch join" = outer join)


Parent rows that are not matched by any child: i.e. childless parents.
SELECT parentFields 
  FROM parent LEFT JOIN child ON pk=fk 
  WHERE anyChildField IS NULL;
(parent parent child child)

#vendors (parents) that have no products (childs)
SELECT v_code,v_name 
 FROM vendor LEFT JOIN product USING(v_code) 
 WHERE p_descript IS NULL;                   #all the child fields will be Null

can be done with subquery instead of join:
SELECT v_code,v_name FROM vendor 
  WHERE v_code NOT IN (SELECT v_code FROM product WHERE v_code IS NOT NULL);
(nulls contaminate)
#note that qualifying code not needed because only vendor table is used in outer select.

**************
Find referential integrity problems of imported data: left table's foreign key does 
 not match primary key of any row of right table. i.e. orphan child (or Null).

SELECT childFields 
  FROM child LEFT JOIN parent ON fk=pk 
  WHERE anyParentField IS NULL;
(child child parent parent)

#product v_codes that are invalid (i.e. not in vendor table) or are Null
SELECT p_code,p_descript 
 FROM product LEFT JOIN vendor USING(v_code) 
 WHERE v_name IS NULL;

#To disclude Null children:
SELECT p_code,p_descript 
 FROM product LEFT JOIN vendor USING(v_code) 
 WHERE v_name IS NULL AND v_code IS NOT NULL;



**************
Right join is reverse of left join, interconvertible:
SELECT ... FROM t2 RIGHT JOIN t1 ON fi=fj...


Matches can be found with either inner or outer join (restricting to rows of right 
 table column(s) that are non-Null) but inner join might be more efficient.


Convert subquery of other table to join:
inner join:
Select fi From t1 Where fj IN (Select ci From t2);
Select DISTINCT fi From t1,t2 Where fj=ci;
Select DISTINCT fi From t1 Inner Join t2 On fj=ci;

#South american countries whose surface areas are greater than Paraguay's:
select Name, SurfaceArea from Country 
 where SurfaceArea>=(select SurfaceArea from Country where Name ='paraguay') 
  and Continent='South America' order by SurfaceArea;
select c1.Name, c1.SurfaceArea from Country as c1, Country as c2 
 where c2.Name='paraguay' and c1.SurfaceArea>=c2.SurfaceArea and c1.Continent='South America' 
 order by SurfaceArea;


outer join:
Select fi From t1 Where fj NOT IN (Select ci From t2);
Select fi From t1 Left Join t2 On fj=ci Where ci Is Null;



***********************************************************************

Union.
concatenate results of several Selects, from multiple tables or one
  table for which the criteria are hard to do with single Where.
SELECT .. UNION SELECT .. UNION SELECT .. ...
must be same number and types of columns from each Select.
result column names from names (or aliases) of first Select.
result column types are widest of any Select.
Order By and Limit applicable to result

select subscriber,email from t1
 union select name,address from t2
 union select real_name,email from t3
 order by email limit 10;

duplicate rows eliminated unless UNION ALL