Skip to content

Commit

Permalink
Initial working version of Lux
Browse files Browse the repository at this point in the history
  • Loading branch information
mratsim committed Jul 4, 2019
1 parent 3720d52 commit a88a858
Show file tree
Hide file tree
Showing 8 changed files with 428 additions and 31 deletions.
62 changes: 62 additions & 0 deletions laser/lux/ast.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Laser
# Copyright (c) 2018 Mamy André-Ratsimbazafy
# Distributed under the Apache v2 License (license terms are at http://www.apache.org/licenses/LICENSE-2.0).
# This file may not be copied, modified, or distributed except according to those terms.

import
./ast/ast_compiler,
./ast/ast_definition

# ###########################
#
# Tests
#
# ###########################

when isMainModule:

proc foobar(a: LuxNode, b, c: LuxNode): tuple[bar: LuxNode, baz, buzz: LuxNode] =

let foo = a + b + c

# Don't use in-place updates
# https://github.com/nim-lang/Nim/issues/11637
let bar = foo * 2

var baz = foo * 3
var buzz = baz

buzz += a * 1000
baz += b
buzz += b

result.bar = bar
result.baz = baz
result.buzz = buzz

proc foobar(a: int, b, c: int): tuple[bar, baz, buzz: int] =
echo "Overloaded proc to test bindings"
discard

generate foobar:
proc foobar(a: seq[float32], b, c: seq[float32]): tuple[bar: seq[float32], baz, buzz: seq[float32]]

# Note to use aligned store, SSE requires 16-byte alignment and AVX 32-byte alignment
# Unfortunately there is no way with normal seq to specify that (pending destructors)
# As a hack, we use the unaligned load and store simd, and a required alignment of 4,
# in practice we define our own tensor type
# with aligned allocator

import sequtils

let
len = 10
u = newSeqWith(len, 1'f32)
v = newSeqWith(len, 2'f32)
w = newSeqWith(len, 3'f32)

let (pim, pam, poum) = foobar(u, v, w)

echo pim # 12
echo pam # 20
echo poum # 10020
4 changes: 2 additions & 2 deletions laser/lux/ast/ast_codegen.nim
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import
macros, tables,
# Internal
./ast_definition,
../platforms/[platform_common]
../platforms

proc codegen*(
ast: LuxNode,
Expand Down Expand Up @@ -125,7 +125,7 @@ proc codegen*(
visited[ast] = memloc
return memloc

proc bodyGen(
proc bodyGen*(
genSimd: bool, arch: SimdArch,
io: varargs[LuxNode],
ids: seq[NimNode],
Expand Down
11 changes: 9 additions & 2 deletions laser/lux/ast/ast_codegen_transfo.nim
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,14 @@
# Distributed under the Apache v2 License (license terms are at http://www.apache.org/licenses/LICENSE-2.0).
# This file may not be copied, modified, or distributed except according to those terms.

proc vectorize(
import
# Standard library
macros,
# Internals
../platforms,
../../private/align_unroller

proc vectorize*(
funcName: NimNode,
ptrs, simds: tuple[inParams, outParams: seq[NimNode]],
len: NimNode,
Expand Down Expand Up @@ -177,7 +184,7 @@ proc vectorize(
block: # Aligned part
let idx = newIdentNode("idx_")
result.add quote do:
let `unroll_stop` = round_down_power_of_2(
let `unroll_stop` = round_step_down(
`len` - `idxPeeling`, `unroll_factor`)

let (fcall, dst, dst_init, dst_assign) = elems(idx, simd = true)
Expand Down
267 changes: 267 additions & 0 deletions laser/lux/ast/ast_compiler.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,267 @@
# Laser
# Copyright (c) 2018 Mamy André-Ratsimbazafy
# Distributed under the Apache v2 License (license terms are at http://www.apache.org/licenses/LICENSE-2.0).
# This file may not be copied, modified, or distributed except according to those terms.

import
# Standard library
macros,
# Internal
../platforms,
./ast_definition,
./ast_sigmatch,
./ast_codegen,
./ast_codegen_transfo,
./macro_utils

# TODO: Do we need both compile and generate?

proc initParams(
procDef,
resultType: NimNode
): tuple[
ids: seq[NimNode],
ptrs, simds: tuple[inParams, outParams: seq[NimNode]],
length: NimNode,
initStmt: NimNode
] =
# Get the idents from proc definition. We order the same as proc def
# Start with non-result
# We work at simd vector level
result.initStmt = newStmtList()
let type0 = newCall(
newIdentNode"type",
nnkBracketExpr.newTree(
procDef[0][3][1][0],
newLit 0
)
)

for i in 1 ..< procDef[0][3].len: # Proc formal params
let iddefs = procDef[0][3][i]
for j in 0 ..< iddefs.len - 2:
let ident = iddefs[j]
result.ids.add ident
let raw_ptr = newIdentNode($ident & "_raw_ptr")
result.ptrs.inParams.add raw_ptr

if j == 0:
result.length = quote do: `ident`.len
else:
let len0 = result.length
result.initStmt.add quote do:
assert `len0` == `ident`.len
result.initStmt.add quote do:
let `raw_ptr` = cast[ptr UncheckedArray[`type0`]](`ident`[0].unsafeAddr)
result.simds.inParams.add newIdentNode($ident & "_simd")

# Now add the result idents
# We work at simd vector level
let len0 = result.length

if resultType.kind == nnkEmpty:
discard
elif resultType.kind == nnkTupleTy:
for i in 0 ..< resultType.len:
let iddefs = resultType[i]
for j in 0 ..< iddefs.len - 2:
let ident = iddefs[j]
result.ids.add ident
let raw_ptr = newIdentNode($ident & "_raw_ptr")
result.ptrs.outParams.add raw_ptr

let res = nnkDotExpr.newTree(
newIdentNode"result",
iddefs[j]
)
result.initStmt.add quote do:
`res` = newSeq[`type0`](`len0`)
let `raw_ptr` = cast[ptr UncheckedArray[`type0`]](`res`[0].unsafeAddr)

result.simds.outParams.add newIdentNode($ident & "_simd")

macro compile(arch: static SimdArch, io: static varargs[LuxNode], procDef: untyped): untyped =
# Note: io must be an array - https://github.com/nim-lang/Nim/issues/10691

# compile([a, b, c, bar, baz, buzz]):
# proc foobar[T](a, b, c: T): tuple[bar, baz, buzz: T]
#
# StmtList
# ProcDef
# Ident "foobar"
# Empty
# GenericParams
# IdentDefs
# Ident "T"
# Empty
# Empty
# FormalParams
# TupleTy
# IdentDefs
# Ident "bar"
# Ident "baz"
# Ident "buzz"
# Ident "T"
# Empty
# IdentDefs
# Ident "a"
# Ident "b"
# Ident "c"
# Ident "T"
# Empty
# Empty
# Empty
# Empty

# echo procDef.treerepr

## Sanity checks
procDef.expectkind(nnkStmtList)
assert procDef.len == 1, "Only 1 statement is allowed, the function definition"
procDef[0].expectkind({nnkProcDef, nnkFuncDef})
# TODO: check that the function inputs are in a symbol table?
procDef[0][6].expectKind(nnkEmpty)

let resultTy = procDef[0][3][0]
let (ids, ptrs, simds, length, initParams) = initParams(procDef, resultTy)

# echo initParams.toStrLit()

let seqT = nnkBracketExpr.newTree(
newIdentNode"seq", newIdentNode"float32"
)

# We create the inner SIMD proc, specialized to a SIMD architecture
# In the inner proc we shadow the original idents ids.
let simdBody = bodyGen(
genSimd = true,
arch = arch,
io = io,
ids = ids,
resultType = resultTy
)

var simdProc = procDef[0].replaceType(seqT, SimdTable[arch][simdType])

simdProc[6] = simdBody # Assign to proc body
echo simdProc.toStrLit

# We create the inner generic proc
let genericBody = bodyGen(
genSimd = false,
arch = ArchGeneric,
io = io,
ids = ids,
resultType = resultTy
)

var genericProc = procDef[0].replaceType(seqT, newIdentNode"float32")
genericProc[6] = genericBody # Assign to proc body
echo genericProc.toStrLit

# We vectorize the inner proc to apply to an contiguous array
var vecBody: NimNode
if arch == x86_SSE:
vecBody = vectorize(
procDef[0][0],
ptrs, simds,
length,
arch, 4, 4 # We require 4 alignment as a hack to keep seq[T] and use unaligned load/store in code
)
else:
vecBody = vectorize(
procDef[0][0],
ptrs, simds,
length,
arch, 4, 8 # We require 4 alignment as a hack to keep seq[T] and use unaligned load/store in code
)

result = procDef.copyNimTree()
let resBody = newStmtList()
resBody.add initParams
resBody.add genericProc
resBody.add simdProc
resBody.add vecBody
result[0][6] = resBody

# echo result.toStrLit

macro generate*(ast_routine: typed, signature: untyped): untyped =
let formalParams = signature[0][3]
let ast = ast_routine.resolveASToverload(formalParams)

# Get the routine signature
let sig = ast.getImpl[3]
sig.expectKind(nnkFormalParams)

# Get all inputs
var inputs: seq[NimNode]
for idx_identdef in 1 ..< sig.len:
let identdef = sig[idx_identdef]
doAssert identdef[^2].eqIdent"LuxNode"
identdef[^1].expectKind(nnkEmpty)
for idx_ident in 0 .. identdef.len-3:
inputs.add genSym(nskLet, $identdef[idx_ident] & "_")

# Allocate inputs
result = newStmtList()
proc ct(ident: NimNode): NimNode =
nnkPragmaExpr.newTree(
ident,
nnkPragma.newTree(
ident"compileTime"
)
)

for i, in_ident in inputs:
result.add newLetStmt(
ct(in_ident),
newCall("input", newLit i)
)

# Call the AST routine
let call = newCall(ast, inputs)
var callAssign: NimNode
case sig[0].kind
of nnkEmpty: # Case 1: no result
result.add call
# Compile-time tuple destructuring is bugged - https://github.com/nim-lang/Nim/issues/11634
# of nnkTupleTy: # Case 2: tuple result
# callAssign = nnkVarTuple.newTree()
# for identdef in sig[0]:
# doAssert identdef[^2].eqIdent"LuxNode"
# identdef[^1].expectKind(nnkEmpty)
# for idx_ident in 0 .. identdef.len-3:
# callAssign.add ct(identdef[idx_ident])
# callAssign.add newEmptyNode()
# callAssign.add call
# result.add nnkLetSection.newTree(
# callAssign
# )
else: # Case 3: single return value
callAssign = ct(genSym(nskLet, "callResult_"))
result.add newLetStmt(
callAssign, call
)

# Collect all the input/output idents
var io = inputs
case sig[0].kind
of nnkEmpty:
discard
of nnkTupleTy:
var idx = 0
for identdef in sig[0]:
for idx_ident in 0 .. identdef.len-3:
io.add nnkBracketExpr.newTree(
callAssign[0],
newLit idx
)
inc idx
else:
io.add callAssign

result.add quote do:
compile(x86_SSE, `io`, `signature`)

echo result.toStrlit
Loading

0 comments on commit a88a858

Please sign in to comment.