SELECT
[DISTINCT] [* | [ expression [[AS] column_name_alias] [, ...] ]
[INTO #temp_table_name]
[FROM table_name]
[WHERE condition [, ...] ]
[GROUP BY expression [, ...] ]
[HAVING condition [, ...] ]
[ORDER BY expression [ASC | DESC] [, ...] ]
[LIMIT row_count]
[WITH (option_name=option_value] [, ...]) ] [;]
option_name:=IDENTIFIER
option_value:= NUMBER | STRING
SELECT [DISTINCT] [* | [ expression [[AS] column_name_alias] [, ...] ]
FROM #temp_table_name
[[INNER | OUTER | CROSS] JOIN #temp_table_name [alias] ON condition
[join_statement]
[join_statement]
...
]
[WHERE condition [, ...] ]
[GROUP BY expression [, ...] ]
[HAVING condition [, ...] ]
[ORDER BY expression [ASC | DESC] [, ...] ]
[LIMIT row_count]
select_into_temp;
[select_into_temp;]
[select_into_temp;]
...
select_from_temp_table;
--some comment in single line
|
/* some block comment */
select_from_temp_table_1
UNION | UNION ALL
select_from_temp_table_2
[UNION | UNION ALL]
select_from_temp_table_3
...
...
CREATE PROCEDURE proc_name
[@parameter_name [= 'Some Default Value'] ]
[@parameter_name [= 'Some Default Value'] ]
...
...
AS
sql-statement;
[sql-statement | sql-temp-table-usage] ;
[sql-statement | sql-temp-table-usage] ;
...
...
Executing PROCEDURE
EXEC proc_name [value1] [,value2] ... [,valueN]
Deleting PROCEDURE
DROP PROCEDURE proc_name
SELECT * FROM $ WITH (SRC='zappysys-public-bucket/cust-1.json')
SELECT * FROM $
WITH (
SRC='zappysys-public-bucket/cust*-?.json'
--,RECURSIVE='True' --Include files from sub folder
)
SELECT * FROM rows
WITH
(DATA='
{
rows : [
{id:1, name: "AAA"},
{id:2, name: "BBB"},
{id:3, name: "CCC"}
]
}'
)
SELECT * FROM rows
WITH
(DATA='
{
rows : [
{id:1, checknumber:"0001", name: "AAA"},
{id:2, checknumber:"0002", name: "BBB"},
{id:3, checknumber:"0003", name: "CCC"}
]
}'
--//Method-1: >>>DIRECT META (compact mode)<<<
,META='id:int;checknumber:string(10);name:string(10)'
-- Add: @OverrideMode:1 in the column list to override meta for only specific column(s) - In below example we are changing default datatype/length for id and checknumber, all other columns are dynamic so dont have to supply in META clause
--,META='@OverrideMode:1;id:int;checknumber:string(10)'
--//Method-2: >>>DIRECT META (json mode)<<<
--,META='[{Name:"id",Type:"Int32"},{Name:"checknumber",Type:"String",Length:10},{Name:"name",Type:"String",Length:10}]'
-- Add: @OverrideMode entry in the column list to override meta for only specific column(s). No need to set Type for that entry - In below example we are changing default datatype/length for id and checknumber, all other columns are dynamic so dont have to supply in META clause
--,META='[{Name:"@OverrideMode"}, {Name:"id",Type:"Int32"},{Name:"checknumber",Type:"String",Length:10}]'
--//Method-3: >>>META from File <<<
--,META='c:/data/meta.json'
--//Method-4: >>>META from Internal Storage - Just refer entry by name <<<
--,META='MY_META_STORAGE_NAME'
)
SELECT * FROM rows
WITH
(DATA=@'{rows : [{id:1, name: "AA\u0041"},\r\n {id:2, name: "BBB"},\r\n {id:3, name: "CCC"}]}'
)
SELECT * FROM $
--LIMIT 10
WITH(
Filter='$.LargeArray[*]--FAST' --//Adding --FAST option turn on STREAM mode (large files)
,SRC='zappysys-public-bucket/large_file_10k_largearray_prop.json.gz'
--,SRC='zappysys-public-bucket/large_file*.json.gz'
,IncludeParentColumns='False' --//This Must be OFF for STREAM mode (read very large files)
,FileCompressionType='GZip' --Zip or None (Zip format only available for Local files)
)
SELECT
substr('ZappySys', 1, 5)
, printf('%s %s %.2f', 'v1', CompanyName, CompanyId)
, ('Str1' || CompanyName || 'Str2') as ConcatExample
, upper('ZappySys')
, lower('ZappySys')
, replace('ZappySys', 'Sys', 'XYZ')
, instr('ZappySys', 'Sys')
, trim(' Zappy Sys ') trim1
, trim('@@@ZappySys@@@', '@') trim2
, rtrim(' ZappySys ')
, ltrim(' ZappySys ')
, length( CompanyName )
FROM $ WITH (
FILTER='$.Info.Rows[*]',
DATA='{ Info: { Rows: [ { CompanyId: 1000 , CompanyName: "ZappySys" }, { CompanyId: 1001 , CompanyName: "Microsoft" } ] } }'
)
SELECT
COALESCE(10,20) coalesce1 --returns first non-null arg
,COALESCE(null,20) coalesce2 --returns first non-null arg
,COALESCE(null,20,30,40) coalesce3 --returns first non-null arg
,IFNULL(null,20) --returns first non-null arg. Same function as COALESCE but simple (just two arguments allowed)
,NULLIF(20,20) nullif1 --returns null if both argument same
,NULLIF(10,20) nullif2 --returns null if both argument same
FROM $ WITH (
FILTER='$.Info.Rows[*]',
DATA='{ Info: { Rows : [ { CompanyId: 1000 , CompanyName: "ZappySys" }, { CompanyId: 1001 , CompanyName: "Microsoft" }] } }')
SELECT
DATE('now') date_now
,STRFTIME('%Y-%m-%dT%H:%M:%f','now') formatted_Date_time
,DATETIME('now') datetime_now
,DATE('now','localtime') datetime_now_local
,TIME('now') time_now
,JULIANDAY('now')
,DATE('now', 'start of month', '+1 month' , '-1 day' ) dt_modify1
,DATE('now', '+5 day') dt_modify2
FROM $ WITH (DATA='{ Info: { Rows : [ { CompanyId: 1000 , CompanyName: "ZappySys" }, { CompanyId: 1001 , CompanyName: "Microsoft" } ] } }')
SELECT
abs(-500)
,random()
,round(1000.236, 2)
FROM $ WITH (DATA='{ Info: { Rows: [ { CompanyId: 1000 , CompanyName: "ZappySys" }, { CompanyId: 1001 , CompanyName: "Microsoft" } ] } }')
SELECT
substr('ZappySys', 1, 5)
, printf('%s %s %.2f', 'v1', CompanyName, CompanyId)
, ('Str1' || CompanyName || 'Str2') as ConcatExample
, upper('ZappySys')
, lower('ZappySys')
, replace('ZappySys', 'Sys', 'XYZ')
, instr('ZappySys', 'Sys')
, trim(' Zappy Sys ') trim1
, trim('@@@ZappySys@@@', '@') trim2
, rtrim(' ZappySys ')
, ltrim(' ZappySys ')
, length( CompanyName )
FROM $ WITH (
FILTER='$.Info.Rows[*]',
DATA='{ Info: { Rows: [ { CompanyId: 1000 , CompanyName: "ZappySys" }, { CompanyId: 1001 , CompanyName: "Microsoft" } ] } }'
)
SELECT
regex_ismatch(SSN, '\d{3}-\d{3}-\d{4}') IsValidSSN, --check pattern found
regex_replace(SSN, '\d','*') MaskedSSN, --search/replace pattern
regex_search(SSN, '\d{4}') SSNLast4Digits, --extract substring using pattern search
regex_search(Email, '(?<name>\w+)@(?<domain>\w+.com)',0, 2) DomainOf1stEmailByIndex, --extract 1st email (zero based index) and get domain name (by group name). For 2nd argument you can use numeric index or named argument. Both arguments are zero based index.
regex_search(Email, '(?<name>\w+)@(?<domain>\w+.com)',2, 2) DomainOf3rdEmailByIndex, --extract 3rd email (zero based index) and get 2nd part of match (i.e. domain name). Both arguments are zero based index.
regex_search('a@x.com | b@y.com | c@z.com', '(?<name>\w+)@(?<domain>\w+.com)','-0', 2) DomainOfLastEmail, --extract last email (use negative sign in quote - zero based index) and get 2nd part of match (i.e. domain name). Both arguments are zero based index.
regex_search(
'a@x.com | b@y.com | c@z.com'
, '(?<name>\w+)@(?<domain>\w+.com)'
, '*' --occurrence match index ( * , N or -N ) negative number will do reverse find (e.g. to get last match use -0 , to get second last use -1)
, 2 --group index from match (default is full match or -1)
--, 0 -- ignore case for pattern match (0=strict match, 1=ignore case, default is 0)
--, 0 -- throw error if not matched 0=don't throw error, 1=throw error
--, '#' -- separator for multi match
) GetAllDomains --extract domains from all emails and show as email (zero based index) and get 2nd part of match (i.e. domain name). Both arguments are zero based index.
FROM $ WITH (Filter='$.data.row[*]', DATA= '{
data: {
row:[{SSN: "111-222-4444", Email: "a@x.com | b@y.com | c@z.com" },{SSN: "111-X2Z-AB44", Email: "a@x.com | b@y.com | c@z.com"}]
}
}
')
SELECT * FROM $ WITH(SRC='mybucket/myservice/myfile_<<yyyy-MM-dd,FUN_TODAY>>.dat') --read today's file
SELECT RTRIM('trimmed ') Col1,
DATE('now', '+5 day') Col2
SELECT * FROM rows
WITH
(DATA='
{
rows : [
{_id:1, _name: "AAA",number:{}},
{_id:2, _name: "BBB",number:{}},
{_id:3, _name: "CCC",number:{}}
]
}'
,EnableCustomReplace=1
,SearchFor='number:{}' --//or USE: --regex , --regex-ic (case-insensitive) or no suffix for plain text search.
,ReplaceWith='number:{blank:1}'
)
SELECT * FROM $
WITH
(
--enter path in SRC or use static value in DATA'
--SRC='zappysys-public-bucket/api/data.json'
--SRC='zappysys-public-bucket/somefile.json'
--SRC='zappysys-public-bucket/some*.json'
DATA='
{ version:1,
doc: {
location: {lat:111.55,lon:-222.12},
rows: [
{id:1, name: "AAA"},
{id:2, name: "BBB"},
{id:3, name: "CCC"}
]
}
}'
,Filter='$.doc.rows[*]' -- for more information check this https://zappysys.com/blog/jsonpath-examples-expression-cheetsheet/
--,Filter='$.doc.rows[?(@name=='AAA')]' --//equal match example
--,Filter='$..[?(@name=='AAA')]' --//equal match example (scan all levels - parent or child)
--,Filter='$.doc.rows[?(@name=~/AAA|BBB/)]' --//regex match example
,IncludeParentColumns='true' --this will include any columns found above filtered node (e.g. rows).
,ParentColumnPrefix='p_'
)
SELECT
Pivot_Name as ProjectName,
Pivot_Value_id as Id,
Pivot_Value_code as Code,
Pivot_Value_budget as Budget
FROM $
WITH(
Filter='$.projects',
--enter path in SRC or use static value in DATA'
--SRC='zappysys-public-bucket/api/data.json'
--SRC='zappysys-public-bucket/some*.json'
--SRC='zappysys-public-bucket/somefile.json'
DATA='
{
projects : {
P101 : {id:1, code: "AAA", budget:1500.10},
P102 : {id:2, code: "BBB", budget:7000.20},
P103 : {id:3, code: "CCC", budget:1100.10}
}
}
',
EnablePivot='True'
)
select
json_value(j1,"$.V1"),
json_array_first(j2),
json_array_last(j2),
json_array_nth(j2,1)
from array WITH (DATA=
'{"array" : [{"c1": "abc", "c2": "ABC", "j1": "{''V1'':''value1''}", "j2": "[13,22,''31'']"}] }')
/*
SELECT * FROM value
WITH(SRC='zappysys-public-bucket/people.json'
, EnableArrayFlattening = 'True' --This is another way to extract data from small arrays. It will flatten arrays upto N items
, MaxArrayItemsToFlatten = 5
)
*/
-- OR --
SELECT
UserName,
Features as Features_Raw,
json_value(Features,'$.[0]') || ',' || json_value(Features,'$.[1]') Features_CSV,
AddressInfo as AddressInfo_Raw,
json_value(AddressInfo,'$.[0].Address') Address1_Address,
json_value(AddressInfo,'$.[0].City.Name') Address1_City,
json_value(AddressInfo,'$.[0].City.CountryRegion') Address1_Country,
json_value(AddressInfo,'$.[0].City.Region') Address1_Region,
json_value(AddressInfo,'$.[1].Address') Address2_Address,
json_value(AddressInfo,'$.[1].City.Name') Address2_City,
json_value(AddressInfo,'$.[1].City.CountryRegion') Address2_Country,
json_value(AddressInfo,'$.[1].City.Region') Address2_Region,
Cost
FROM value
WITH(SRC='zappysys-public-bucket/people.json'
)
SELECT * FROM rows
WITH(
--enter path in SRC or use static value in DATA'
--SRC='zappysys-public-bucket/api/data.json'
--SRC='zappysys-public-bucket/some*.json'
--SRC='zappysys-public-bucket/somefile.json'
DATA='
{
"columns": ["RecordID","CustomerID","CustomerName"],
"rows": [ [1,"AAA","Customer A"], [2,"BBB","Customer B"], [3,"CCC","Customer C"] ]
}'
,ArrayTransformType='TransformSimpleTwoDimensionalArray'
,ArrayTransColumnNameFilter='$.columns[*]'
)
SELECT * FROM rows
WITH(
--enter path in SRC or use static value in DATA'
--SRC='zappysys-public-bucket/api/data.json'
--SRC='zappysys-public-bucket/some*.json'
--SRC='zappysys-public-bucket/somefile.json'
DATA='
{
"rows": [ [1,"AAA","Customer A"], [2,"BBB","Customer B"], [3,"CCC","Customer C"] ]
}'
,ArrayTransformType='TransformSimpleTwoDimensionalArray'
,ArrayTransEnableCustomColumns='True'
,ArrayTransCustomColumns='RecordID,CustomerID,CustomerName'
)
SELECT * FROM rows
WITH(
--enter path in SRC or use static value in DATA'
--SRC='zappysys-public-bucket/api/data.json'
--SRC='zappysys-public-bucket/some*.json'
--SRC='zappysys-public-bucket/somefile.json'
DATA='
{
requestid:1,
schema:{ fields:[{name:"id"},{name:"name"},{name:"birthdate"}] },
rows: [ { f:[{v:10},{v:"aaa"},{v:"2015-01-01"}]}, { f:[{v:20},{v:"bbb"},{v:"2015-02-01"}] } ]
}'
,ArrayTransformType = 'TransformComplexTwoDimensionalArray'
,ArrayTransColumnNameFilter = '$.schema.fields[*].name'
,ArrayTransRowValueFilter = '$.f[*].v'
)
SELECT * FROM $
WITH(
Src='zappysys-public-bucket/cust-1.json'
,TempStorageMode='Disk' --or 'Memory'
--OR
--,TempStorageMode=1 --//Memory=0, Disk=1
)
SELECT * FROM value
WHERE OrderDate<=DATETIME('1997-12-31 00:00:00') -- OR use DATE('1997-12-31') DateTime column can be queried this way. You must wrap DATETIME function around it. Also must use 'yyyy-MM-dd' or 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd HH:mm:ss.fff' format (where fff is milliseconds)
ORDER BY DATETIME(OrderDate)
WITH (SRC='zappysys-public-bucket/invoices.json')
SELECT
Country AS Invoice_Country
, SUM(UnitPrice * Quantity) AS Invoice_Total
FROM value
WHERE Discount > 0
--AND OrderDate<=DATETIME('1997-12-31 00:00:00') -- OR use DATE('1997-12-31') DateTime column can be queried this way. You must wrap DATETIME function around it. Also must use 'yyyy-MM-dd' or 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd HH:mm:ss.fff' format (where fff is milliseconds)
GROUP BY Country
HAVING SUM(UnitPrice * Quantity) > 1000
ORDER BY Invoice_Total DESC--,DATETIME(OrderDate)
LIMIT 3
WITH (SRC='zappysys-public-bucket/invoices.json')
SELECT
name
, CASE id
WHEN 1 THEN 1+1
WHEN 2 THEN 2+2
ELSE 0
END ThisIsCaseColumn
FROM rows
WITH
(DATA='
{
rows : [
{id:1, name: "AAA"},
{id:2, name: "BBB"},
{id:3, name: "CCC"}
]
}'
)
SELECT * INTO #tmp FROM rows
WITH
(
--enter path in SRC or use static value in DATA
--SRC='zappysys-public-bucket/api/data.json'
--SRC='zappysys-public-bucket/some*.json'
--SRC='zappysys-public-bucket/somefile.json'
--OR--
DATA='
{
rows : [
{id:1, name: "AAA"},
{id:2, name: "BBB"},
{id:3, name: "CCC"}
]
}'
);
--//second statement in the same batch
select * from #tmp
where id > 1
--AND SomeDateColumn=DATETIME('2012-12-31 00:00:00') --DateTime column can be queried this way... Must use 'yyyy-MM-dd HH:mm:ss' format
;
SELECT * INTO #cust FROM rows
WITH (
--enter path in SRC or use static value in DATA
--SRC='zappysys-public-bucket/api/customers.json'
--SRC='zappysys-public-bucket/cust*.json'
--SRC='zappysys-public-bucket/customers.json'
DATA='{
rows: [
{customerid: 1, name: "AAA"},
{customerid: 1, name: "BBB"},
{customerid: 1, name: "CCC"}
]
}
'
);
SELECT * INTO #ord FROM rows
WITH (
DATA='{
rows: [
{orderid:1000, customerid: 1, orderdate: "2012-01-01T00:00:00",total:2000.50},
{orderid:1001, customerid: 1, orderdate: "2012-01-01T00:00:00",total:2000.50},
{orderid:1002, customerid: 2, orderdate: "2013-01-01T00:00:00",total:5000.70},
{orderid:1003, customerid: 2, orderdate: "2013-01-02T00:00:00",total:6000.70}
]
}'
);
--//second statement in the same batch
SELECT c.customerid,o.orderid,o.orderdate,o.total
FROM #cust c
INNER JOIN #ord o ON c.customerid=o.customerid
--LEFT OUTER JOIN #ord o ON c.customerid=o.customerid
--WHERE o.orderdate=DATETIME('2012-01-01 00:00:00') --for datetime column format must be queried in 'yyyy-MM-dd HH:mm:ss' format only
-- To Use greater than less than must need to use DATETIME function (e.g. WHERE o.OrderDate<=DATETIME('1997-12-31 23:12:59')
;
SELECT * into #tbl1 FROM rows
WITH (
--enter path in SRC or use static value in DATA
--SRC='zappysys-public-bucket/api/data.json'
--SRC='zappysys-public-bucket/some*.json'
--SRC='zappysys-public-bucket/somefile.json'
DATA='{ rows : [{id:1, name: "AAA"}, {id:2, name: "BBB"}]}'
);
SELECT * into #tbl2 FROM rows
WITH (DATA='{ rows : [{id:3, name: "CCC"}, {id:4, name: "DDD"}]}' );
select * from #tbl1
UNION ALL
select * from #tbl2;
--Whenever Possible try to write Simple Query. When you use WHERE / GROUP BY / ORDER BY Expressions. Driver may invoke Client side processing if server side doesnt support that construct.
--For comment use line comment with two dash (e.g. --my comment) or use block comment for multi line comment (e.g. /*my comment*/ )
--Any query must contain at least two required parts
--data source (or table) names and a list of column names
--column names come first (preceded by SELECT keyword)
--table names follow (preceded by FROM keyword)
--Here is the query that returns all rows with all columns (* stands for all columns) from 'Accounts' table
SELECT * FROM Accounts
--Use quoted identifiers when you have space or other character in names (e.g. Column or Table name contains space)
SELECT ID, "Account Name" FROM "Closed Accounts"
SELECT ID, [Account Name] FROM [Closed Accounts]
--returns all rows from 'Accounts' table, each row will have Id and Amount columns
SELECT Id, Amount FROM Accounts
--returns only those rows of 'Accounts' table that meet next condition: Amount is greater or equal to 1000.0
SELECT Id, Amount FROM Accounts WHERE Amount >= 1000.0
--returns only those rows of 'Accounts' table that meet next condition: Amount is greater than 1000 or DepartamentID is equal to 123
SELECT Id, Amount FROM Accounts WHERE (Amount > 1000.0 OR DepartamentID = 123)
--returns only those rows of 'Accounts' table that meet next condition: DepartamentID is equal to 1, 101 or 202
SELECT Id, Amount FROM Accounts WHERE DepartamentID IN (1, 101, 202)
--returns only those rows of 'Accounts' table that meet next condition: DepartamentID is equal to 321 and Amount is less than 100
SELECT Id, Amount FROM Accounts WHERE DepartamentID = 321 AND Amount < 100
--returns only those rows of 'Persons' table that meet next condition: Name column value starts with 'John'
SELECT * FROM Person WHERE Name LIKE 'John%'
--case statement
SELECT
FirstName,
LastName,
CASE Country
WHEN 'US' THEN 'NA'
WHEN 'Canada' THEN 'NA'
WHEN 'India' THEN 'Asia'
WHEN 'China' THEN 'Asia'
ELSE 'Unknown'
END ColRegion
FROM Person
--escaping string values (use of /n /r /t /v allowed)
SELECT * FROM MyTable WITH( Body=@'Line1/nLine2')