Skip to content

Commit 83b4c05

Browse files
Merge pull request swiftlang#8692 from aschwaighofer/wip_support_for_const_string
Add SIL and IRGen support for a ConstantStringLiteral instruction
2 parents 3623d40 + b167b44 commit 83b4c05

22 files changed

+463
-10
lines changed

include/swift/SIL/SILBuilder.h

+15
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,21 @@ class SILBuilder {
461461
getSILDebugLocation(Loc), text.toStringRef(Out), encoding, F));
462462
}
463463

464+
ConstStringLiteralInst *
465+
createConstStringLiteral(SILLocation Loc, StringRef text,
466+
ConstStringLiteralInst::Encoding encoding) {
467+
return insert(ConstStringLiteralInst::create(getSILDebugLocation(Loc), text,
468+
encoding, F));
469+
}
470+
471+
ConstStringLiteralInst *
472+
createConstStringLiteral(SILLocation Loc, const Twine &text,
473+
ConstStringLiteralInst::Encoding encoding) {
474+
SmallVector<char, 256> Out;
475+
return insert(ConstStringLiteralInst::create(
476+
getSILDebugLocation(Loc), text.toStringRef(Out), encoding, F));
477+
}
478+
464479
LoadInst *createLoad(SILLocation Loc, SILValue LV,
465480
LoadOwnershipQualifier Qualifier) {
466481
assert((Qualifier != LoadOwnershipQualifier::Unqualified) ||

include/swift/SIL/SILCloner.h

+12-3
Original file line numberDiff line numberDiff line change
@@ -665,9 +665,18 @@ SILCloner<ImplClass>::visitStringLiteralInst(StringLiteralInst *Inst) {
665665
Inst->getEncoding()));
666666
}
667667

668-
template<typename ImplClass>
669-
void
670-
SILCloner<ImplClass>::visitLoadInst(LoadInst *Inst) {
668+
template <typename ImplClass>
669+
void SILCloner<ImplClass>::visitConstStringLiteralInst(
670+
ConstStringLiteralInst *Inst) {
671+
getBuilder().setCurrentDebugScope(getOpScope(Inst->getDebugScope()));
672+
doPostProcess(Inst,
673+
getBuilder().createConstStringLiteral(
674+
getOpLocation(Inst->getLoc()), Inst->getValue(),
675+
Inst->getEncoding()));
676+
}
677+
678+
template <typename ImplClass>
679+
void SILCloner<ImplClass>::visitLoadInst(LoadInst *Inst) {
671680
getBuilder().setCurrentDebugScope(getOpScope(Inst->getDebugScope()));
672681
doPostProcess(Inst, getBuilder().createLoad(getOpLocation(Inst->getLoc()),
673682
getOpValue(Inst->getOperand()),

include/swift/SIL/SILInstruction.h

+45
Original file line numberDiff line numberDiff line change
@@ -1655,6 +1655,51 @@ class StringLiteralInst final : public LiteralInst,
16551655
}
16561656
};
16571657

1658+
/// ConstStringLiteralInst - Encapsulates a string constant, as defined
1659+
/// originally by
1660+
/// a StringLiteralExpr. This produces the address of the string data as a
1661+
/// Builtin.RawPointer.
1662+
class ConstStringLiteralInst final
1663+
: public LiteralInst,
1664+
private llvm::TrailingObjects<ConstStringLiteralInst, char> {
1665+
friend TrailingObjects;
1666+
friend SILBuilder;
1667+
1668+
public:
1669+
enum class Encoding {
1670+
UTF8,
1671+
UTF16,
1672+
};
1673+
1674+
private:
1675+
unsigned Length;
1676+
Encoding TheEncoding;
1677+
1678+
ConstStringLiteralInst(SILDebugLocation DebugLoc, StringRef text,
1679+
Encoding encoding, SILType ty);
1680+
1681+
static ConstStringLiteralInst *create(SILDebugLocation DebugLoc,
1682+
StringRef Text, Encoding encoding,
1683+
SILFunction &F);
1684+
1685+
public:
1686+
/// getValue - Return the string data for the literal, in UTF-8.
1687+
StringRef getValue() const { return {getTrailingObjects<char>(), Length}; }
1688+
1689+
/// getEncoding - Return the desired encoding of the text.
1690+
Encoding getEncoding() const { return TheEncoding; }
1691+
1692+
/// getCodeUnitCount - Return encoding-based length of the string
1693+
/// literal in code units.
1694+
uint64_t getCodeUnitCount();
1695+
1696+
ArrayRef<Operand> getAllOperands() const { return {}; }
1697+
MutableArrayRef<Operand> getAllOperands() { return {}; }
1698+
1699+
static bool classof(const ValueBase *V) {
1700+
return V->getKind() == ValueKind::ConstStringLiteralInst;
1701+
}
1702+
};
16581703
//===----------------------------------------------------------------------===//
16591704
// Memory instructions.
16601705
//===----------------------------------------------------------------------===//

include/swift/SIL/SILNodes.def

+2-1
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,8 @@ ABSTRACT_VALUE(SILInstruction, ValueBase)
185185
INST(IntegerLiteralInst, LiteralInst, integer_literal, None, DoesNotRelease)
186186
INST(FloatLiteralInst, LiteralInst, float_literal, None, DoesNotRelease)
187187
INST(StringLiteralInst, LiteralInst, string_literal, None, DoesNotRelease)
188-
VALUE_RANGE(LiteralInst, FunctionRefInst, StringLiteralInst)
188+
INST(ConstStringLiteralInst, LiteralInst, const_string_literal, None, DoesNotRelease)
189+
VALUE_RANGE(LiteralInst, FunctionRefInst, ConstStringLiteralInst)
189190

190191
// Dynamic Dispatch
191192
ABSTRACT_VALUE(MethodInst, SILInstruction)

include/swift/Serialization/ModuleFormat.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ const uint16_t VERSION_MAJOR = 0;
5454
/// in source control, you should also update the comment to briefly
5555
/// describe what change you made. The content of this comment isn't important;
5656
/// it just ensures a conflict if two people change the module format.
57-
const uint16_t VERSION_MINOR = 331; // Last change: type witness substitutions
57+
const uint16_t VERSION_MINOR = 332; // Last change: constant_string_literal
5858

5959
using DeclID = PointerEmbeddedInt<unsigned, 31>;
6060
using DeclIDField = BCFixed<31>;

lib/IRGen/GenDecl.cpp

+165
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,9 @@
2727
#include "swift/AST/TypeMemberVisitor.h"
2828
#include "swift/AST/Types.h"
2929
#include "swift/ClangImporter/ClangModule.h"
30+
#include "swift/Demangling/ManglingMacros.h"
3031
#include "swift/IRGen/Linking.h"
32+
#include "swift/Runtime/HeapObject.h"
3133
#include "swift/SIL/FormalLinkage.h"
3234
#include "swift/SIL/SILDebugScope.h"
3335
#include "swift/SIL/SILModule.h"
@@ -3079,6 +3081,169 @@ llvm::Constant *IRGenModule::getAddrOfGlobalUTF16String(StringRef utf8) {
30793081
return address;
30803082
}
30813083

3084+
static llvm::Constant *getMetatypeDeclarationFor(IRGenModule &IGM,
3085+
StringRef name) {
3086+
auto *storageType = IGM.ObjCClassStructTy;
3087+
3088+
// We may have defined the variable already.
3089+
if (auto existing = IGM.Module.getNamedGlobal(name))
3090+
return getElementBitCast(existing, storageType);
3091+
3092+
auto linkage = llvm::GlobalValue::ExternalLinkage;
3093+
auto visibility = llvm::GlobalValue::DefaultVisibility;
3094+
auto storageClass = llvm::GlobalValue::DefaultStorageClass;
3095+
3096+
auto var = new llvm::GlobalVariable(IGM.Module, storageType,
3097+
/*constant*/ false, linkage,
3098+
/*initializer*/ nullptr, name);
3099+
var->setVisibility(visibility);
3100+
var->setDLLStorageClass(storageClass);
3101+
var->setAlignment(IGM.getPointerAlignment().getValue());
3102+
3103+
return var;
3104+
}
3105+
#define STRINGIFY_IMPL(x) #x
3106+
#define REALLY_STRINGIFY( x) STRINGIFY_IMPL(x)
3107+
3108+
llvm::Constant *
3109+
IRGenModule::getAddrOfGlobalConstantString(StringRef utf8) {
3110+
auto &entry = GlobalConstantStrings[utf8];
3111+
if (entry)
3112+
return entry;
3113+
3114+
// If not, create it. This implicitly adds a trailing null.
3115+
auto data = llvm::ConstantDataArray::getString(LLVMContext, utf8);
3116+
auto *dataTy = data->getType();
3117+
3118+
llvm::Type *constantStringTy[] = {
3119+
RefCountedStructTy,
3120+
Int32Ty,
3121+
Int32Ty,
3122+
Int8Ty,
3123+
dataTy
3124+
};
3125+
auto *ConstantStringTy =
3126+
llvm::StructType::get(getLLVMContext(), constantStringTy,
3127+
/*packed*/ false);
3128+
3129+
auto metaclass = getMetatypeDeclarationFor(
3130+
*this, REALLY_STRINGIFY(CLASS_METADATA_SYM(s20_Latin1StringStorage)));
3131+
3132+
metaclass = llvm::ConstantExpr::getBitCast(metaclass, TypeMetadataPtrTy);
3133+
3134+
// Get a reference count of two.
3135+
auto *strongRefCountInit = llvm::ConstantInt::get(
3136+
Int32Ty,
3137+
InlineRefCountBits(0 /*unowned ref count*/, 2 /*strong ref count*/)
3138+
.getBitsValue());
3139+
auto *unownedRefCountInit = llvm::ConstantInt::get(Int32Ty, 0);
3140+
3141+
auto *count = llvm::ConstantInt::get(Int32Ty, utf8.size());
3142+
// Capacitity is length plus one because of the implicitly added '\0'
3143+
// character.
3144+
auto *capacity = llvm::ConstantInt::get(Int32Ty, utf8.size() + 1);
3145+
auto *flags = llvm::ConstantInt::get(Int8Ty, 0);
3146+
3147+
// FIXME: Big endian-ness.
3148+
llvm::Constant *heapObjectHeaderFields[] = {
3149+
metaclass, strongRefCountInit, unownedRefCountInit
3150+
};
3151+
3152+
auto *initRefCountStruct = llvm::ConstantStruct::get(
3153+
RefCountedStructTy, makeArrayRef(heapObjectHeaderFields));
3154+
3155+
llvm::Constant *fields[] = {
3156+
initRefCountStruct, count, capacity, flags, data};
3157+
auto *init =
3158+
llvm::ConstantStruct::get(ConstantStringTy, makeArrayRef(fields));
3159+
3160+
auto global = new llvm::GlobalVariable(Module, init->getType(), true,
3161+
llvm::GlobalValue::PrivateLinkage,
3162+
init);
3163+
global->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3164+
3165+
// Cache string entry.
3166+
entry = global;
3167+
3168+
return global;
3169+
}
3170+
3171+
llvm::Constant *
3172+
IRGenModule::getAddrOfGlobalUTF16ConstantString(StringRef utf8) {
3173+
auto &entry = GlobalConstantUTF16Strings[utf8];
3174+
if (entry)
3175+
return entry;
3176+
3177+
// If not, first transcode it to UTF16.
3178+
SmallVector<llvm::UTF16, 128> buffer(utf8.size() + 1); // +1 for ending nulls.
3179+
const llvm::UTF8 *fromPtr = (const llvm::UTF8 *) utf8.data();
3180+
llvm::UTF16 *toPtr = &buffer[0];
3181+
(void) ConvertUTF8toUTF16(&fromPtr, fromPtr + utf8.size(),
3182+
&toPtr, toPtr + utf8.size(),
3183+
llvm::strictConversion);
3184+
3185+
// The length of the transcoded string in UTF-8 code points.
3186+
size_t utf16Length = toPtr - &buffer[0];
3187+
3188+
// Null-terminate the UTF-16 string.
3189+
*toPtr = 0;
3190+
ArrayRef<llvm::UTF16> utf16(&buffer[0], utf16Length + 1);
3191+
3192+
auto *data = llvm::ConstantDataArray::get(LLVMContext, utf16);
3193+
auto *dataTy = data->getType();
3194+
3195+
llvm::Type *constantUTFStringTy[] = {
3196+
RefCountedStructTy,
3197+
Int32Ty,
3198+
Int32Ty,
3199+
Int8Ty,
3200+
Int8Ty, // For 16-byte alignment.
3201+
dataTy
3202+
};
3203+
auto *ConstantUTFStringTy =
3204+
llvm::StructType::get(getLLVMContext(), constantUTFStringTy,
3205+
/*packed*/ false);
3206+
3207+
auto metaclass = getMetatypeDeclarationFor(
3208+
*this, REALLY_STRINGIFY(CLASS_METADATA_SYM(s19_UTF16StringStorage)));
3209+
3210+
metaclass = llvm::ConstantExpr::getBitCast(metaclass, TypeMetadataPtrTy);
3211+
3212+
// Get a reference count of two.
3213+
auto *strongRefCountInit = llvm::ConstantInt::get(
3214+
Int32Ty,
3215+
InlineRefCountBits(0 /*unowned ref count*/, 2 /*strong ref count*/)
3216+
.getBitsValue());
3217+
auto *unownedRefCountInit = llvm::ConstantInt::get(Int32Ty, 0);
3218+
3219+
auto *count = llvm::ConstantInt::get(Int32Ty, utf16Length);
3220+
auto *capacity = llvm::ConstantInt::get(Int32Ty, utf16Length + 1);
3221+
auto *flags = llvm::ConstantInt::get(Int8Ty, 0);
3222+
auto *padding = llvm::ConstantInt::get(Int8Ty, 0);
3223+
3224+
llvm::Constant *heapObjectHeaderFields[] = {
3225+
metaclass, strongRefCountInit, unownedRefCountInit
3226+
};
3227+
3228+
auto *initRefCountStruct = llvm::ConstantStruct::get(
3229+
RefCountedStructTy, makeArrayRef(heapObjectHeaderFields));
3230+
3231+
llvm::Constant *fields[] = {
3232+
initRefCountStruct, count, capacity, flags, padding, data};
3233+
auto *init =
3234+
llvm::ConstantStruct::get(ConstantUTFStringTy, makeArrayRef(fields));
3235+
3236+
auto global = new llvm::GlobalVariable(Module, init->getType(), true,
3237+
llvm::GlobalValue::PrivateLinkage,
3238+
init);
3239+
global->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3240+
3241+
// Cache string entry.
3242+
entry = global;
3243+
3244+
return global;
3245+
}
3246+
30823247
/// Do we have to use resilient access patterns when working with this
30833248
/// declaration?
30843249
///

lib/IRGen/IRGenModule.cpp

+2-3
Original file line numberDiff line numberDiff line change
@@ -256,8 +256,7 @@ IRGenModule::IRGenModule(IRGenerator &irgen,
256256
});
257257
FullBoxMetadataPtrTy = FullBoxMetadataStructTy->getPointerTo(DefaultAS);
258258

259-
260-
llvm::Type *refCountedElts[] = { TypeMetadataPtrTy, Int32Ty, Int32Ty };
259+
llvm::Type *refCountedElts[] = {TypeMetadataPtrTy, Int32Ty, Int32Ty};
261260
RefCountedStructTy->setBody(refCountedElts);
262261

263262
PtrSize = Size(DataLayout.getPointerSize(DefaultAS));
@@ -351,7 +350,7 @@ IRGenModule::IRGenModule(IRGenerator &irgen,
351350
openedErrorTriple,
352351
/*packed*/ false);
353352
OpenedErrorTriplePtrTy = OpenedErrorTripleTy->getPointerTo(DefaultAS);
354-
353+
355354
InvariantMetadataID = LLVMContext.getMDKindID("invariant.load");
356355
InvariantNode = llvm::MDNode::get(LLVMContext, {});
357356
DereferenceableID = LLVMContext.getMDKindID("dereferenceable");

lib/IRGen/IRGenModule.h

+6
Original file line numberDiff line numberDiff line change
@@ -652,6 +652,8 @@ class IRGenModule {
652652
llvm::Constant *getAddrOfGlobalString(StringRef utf8,
653653
bool willBeRelativelyAddressed = false);
654654
llvm::Constant *getAddrOfGlobalUTF16String(StringRef utf8);
655+
llvm::Constant *getAddrOfGlobalConstantString(StringRef utf8);
656+
llvm::Constant *getAddrOfGlobalUTF16ConstantString(StringRef utf8);
655657
llvm::Constant *getAddrOfObjCSelectorRef(StringRef selector);
656658
llvm::Constant *getAddrOfObjCMethodName(StringRef methodName);
657659
llvm::Constant *getAddrOfObjCProtocolRecord(ProtocolDecl *proto,
@@ -693,6 +695,10 @@ class IRGenModule {
693695
llvm::StringMap<llvm::Constant*> ObjCSelectorRefs;
694696
llvm::StringMap<llvm::Constant*> ObjCMethodNames;
695697

698+
/// Maps to constant swift 'String's.
699+
llvm::StringMap<llvm::Constant*> GlobalConstantStrings;
700+
llvm::StringMap<llvm::Constant*> GlobalConstantUTF16Strings;
701+
696702
/// LLVMUsed - List of global values which are required to be
697703
/// present in the object file; bitcast to i8*. This is used for
698704
/// forcing visibility of symbols which may otherwise be optimized

lib/IRGen/IRGenSIL.cpp

+17
Original file line numberDiff line numberDiff line change
@@ -822,6 +822,7 @@ class IRGenSILFunction :
822822
void visitIntegerLiteralInst(IntegerLiteralInst *i);
823823
void visitFloatLiteralInst(FloatLiteralInst *i);
824824
void visitStringLiteralInst(StringLiteralInst *i);
825+
void visitConstStringLiteralInst(ConstStringLiteralInst *i);
825826

826827
void visitLoadInst(LoadInst *i);
827828
void visitStoreInst(StoreInst *i);
@@ -2343,6 +2344,22 @@ void IRGenSILFunction::visitStringLiteralInst(swift::StringLiteralInst *i) {
23432344
setLoweredExplosion(i, e);
23442345
}
23452346

2347+
void IRGenSILFunction::visitConstStringLiteralInst(
2348+
swift::ConstStringLiteralInst *i) {
2349+
2350+
llvm::Constant *addr;
2351+
if (i->getEncoding() == ConstStringLiteralInst::Encoding::UTF8)
2352+
addr = IGM.getAddrOfGlobalConstantString(i->getValue());
2353+
else
2354+
addr = IGM.getAddrOfGlobalUTF16ConstantString(i->getValue());
2355+
2356+
addr = llvm::ConstantExpr::getBitCast(addr, IGM.Int8PtrTy);
2357+
2358+
Explosion e;
2359+
e.add(addr);
2360+
setLoweredExplosion(i, e);
2361+
}
2362+
23462363
void IRGenSILFunction::visitUnreachableInst(swift::UnreachableInst *i) {
23472364
Builder.CreateUnreachable();
23482365
}

0 commit comments

Comments
 (0)