Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

discussing changes to LanguageAnalyzer #788

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ This section is for maintaining a changelog for all breaking changes for the cli
### Added
- Document HTTP/2 support ([#330](https://github.com/opensearch-project/opensearch-java/pull/330))
- Expose HTTP status code through `ResponseException#status` ([#756](https://github.com/opensearch-project/opensearch-java/pull/756))
- Added support for 33 new language analyzers (only Dutch existed previously) ([#779](https://github.com/opensearch-project/opensearch-java/pull/779))

### Dependencies

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
package org.opensearch.client.opensearch._types.analysis;

import jakarta.json.stream.JsonGenerator;

import java.util.Arrays;
import java.util.function.Function;
import org.opensearch.client.json.JsonEnum;
import org.opensearch.client.json.JsonpDeserializable;
Expand Down Expand Up @@ -62,8 +64,6 @@ public class Analyzer implements TaggedUnion<Analyzer.Kind, AnalyzerVariant>, Js
public enum Kind implements JsonEnum {
Custom("custom"),

Dutch("dutch"),

Fingerprint("fingerprint"),

IcuAnalyzer("icu_analyzer"),
Expand All @@ -72,8 +72,6 @@ public enum Kind implements JsonEnum {

Kuromoji("kuromoji"),

Language("language"),

Nori("nori"),

Pattern("pattern"),
Expand All @@ -92,6 +90,74 @@ public enum Kind implements JsonEnum {

Cjk("cjk"),

Arabic("arabic"),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Curious why do we need to mirror Language enumeration here?


Armenian("armenian"),

Basque("basque"),

Bengali("bengali"),

Brazilian("brazilian"),

Bulgarian("bulgarian"),

Catalan("catalan"),

Czech("czech"),

Danish("danish"),

Dutch("dutch"),

English("english"),

Estonian("estonian"),

Finnish("finnish"),

French("french"),

Galician("galician"),

German("german"),

Greek("greek"),

Hindi("hindi"),

Hungarian("hungarian"),

Indonesian("indonesian"),

Irish("irish"),

Italian("italian"),

Latvian("latvian"),

Lithuanian("lithuanian"),

Norwegian("norwegian"),

Persian("persian"),

Portuguese("portuguese"),

Romanian("romanian"),

Russian("russian"),

Sorani("sorani"),

Spanish("spanish"),

Swedish("swedish"),

Turkish("turkish"),

Thai("thai"),

;

private final String jsonValue;
Expand Down Expand Up @@ -154,23 +220,6 @@ public CustomAnalyzer custom() {
return TaggedUnionUtils.get(this, Kind.Custom);
}

/**
* Is this variant instance of kind {@code dutch}?
*/
public boolean isDutch() {
return _kind == Kind.Dutch;
}

/**
* Get the {@code dutch} variant value.
*
* @throws IllegalStateException
* if the current variant is not of the {@code dutch} kind.
*/
public DutchAnalyzer dutch() {
return TaggedUnionUtils.get(this, Kind.Dutch);
}

/**
* Is this variant instance of kind {@code fingerprint}?
*/
Expand Down Expand Up @@ -239,22 +288,6 @@ public KuromojiAnalyzer kuromoji() {
return TaggedUnionUtils.get(this, Kind.Kuromoji);
}

/**
* Is this variant instance of kind {@code language}?
*/
public boolean isLanguage() {
return _kind == Kind.Language;
}

/**
* Get the {@code language} variant value.
*
* @throws IllegalStateException
* if the current variant is not of the {@code language} kind.
*/
public LanguageAnalyzer language() {
return TaggedUnionUtils.get(this, Kind.Language);
}

/**
* Is this variant instance of kind {@code nori}?
Expand Down Expand Up @@ -399,16 +432,6 @@ public boolean isCjk() {
return _kind == Kind.Cjk;
}

/**
* Get the {@code cjk} variant value.
*
* @throws IllegalStateException
* if the current variant is not of the {@code cjk} kind.
*/
public CjkAnalyzer cjk() {
return TaggedUnionUtils.get(this, Kind.Cjk);
}

@Override
public void serialize(JsonGenerator generator, JsonpMapper mapper) {

Expand All @@ -430,16 +453,6 @@ public ObjectBuilder<Analyzer> custom(Function<CustomAnalyzer.Builder, ObjectBui
return this.custom(fn.apply(new CustomAnalyzer.Builder()).build());
}

public ObjectBuilder<Analyzer> dutch(DutchAnalyzer v) {
this._kind = Kind.Dutch;
this._value = v;
return this;
}

public ObjectBuilder<Analyzer> dutch(Function<DutchAnalyzer.Builder, ObjectBuilder<DutchAnalyzer>> fn) {
return this.dutch(fn.apply(new DutchAnalyzer.Builder()).build());
}

public ObjectBuilder<Analyzer> fingerprint(FingerprintAnalyzer v) {
this._kind = Kind.Fingerprint;
this._value = v;
Expand Down Expand Up @@ -481,7 +494,7 @@ public ObjectBuilder<Analyzer> kuromoji(Function<KuromojiAnalyzer.Builder, Objec
}

public ObjectBuilder<Analyzer> language(LanguageAnalyzer v) {
this._kind = Kind.Language;
this._kind = Kind.valueOf(v.language().name());
this._value = v;
return this;
}
Expand Down Expand Up @@ -570,16 +583,6 @@ public ObjectBuilder<Analyzer> smartcn() {
return this.smartcn(new SmartcnAnalyzer.Builder().build());
}

public ObjectBuilder<Analyzer> cjk(CjkAnalyzer v) {
this._kind = Kind.Cjk;
this._value = v;
return this;
}

public ObjectBuilder<Analyzer> cjk(Function<CjkAnalyzer.Builder, ObjectBuilder<CjkAnalyzer>> fn) {
return this.cjk(fn.apply(new CjkAnalyzer.Builder()).build());
}

public Analyzer build() {
_checkSingleUse();
return new Analyzer(this);
Expand All @@ -589,8 +592,11 @@ public Analyzer build() {

protected static void setupAnalyzerDeserializer(ObjectDeserializer<Builder> op) {

for (Language value : Language.values()) {
op.add(Builder::language, LanguageAnalyzer._DESERIALIZER, value.jsonValue().toLowerCase());
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You already did that, right?

Suggested change
op.add(Builder::language, LanguageAnalyzer._DESERIALIZER, value.jsonValue().toLowerCase());
op.add(Builder::language, LanguageAnalyzer._DESERIALIZER, value.jsonValue());

//TODO should we lowercase in the Language Enum?
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Language jsonValue has to match the Kind jsonValue
So we set the deserializers for all the languages here

}
op.add(Builder::custom, CustomAnalyzer._DESERIALIZER, "custom");
op.add(Builder::dutch, DutchAnalyzer._DESERIALIZER, "dutch");
op.add(Builder::fingerprint, FingerprintAnalyzer._DESERIALIZER, "fingerprint");
op.add(Builder::icuAnalyzer, IcuAnalyzer._DESERIALIZER, "icu_analyzer");
op.add(Builder::keyword, KeywordAnalyzer._DESERIALIZER, "keyword");
Expand All @@ -604,7 +610,6 @@ protected static void setupAnalyzerDeserializer(ObjectDeserializer<Builder> op)
op.add(Builder::stop, StopAnalyzer._DESERIALIZER, "stop");
op.add(Builder::whitespace, WhitespaceAnalyzer._DESERIALIZER, "whitespace");
op.add(Builder::smartcn, SmartcnAnalyzer._DESERIALIZER, Kind.Smartcn.jsonValue());
op.add(Builder::cjk, CjkAnalyzer._DESERIALIZER, Kind.Cjk.jsonValue());

op.setTypeProperty("type", null);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,6 @@ public static CustomAnalyzer.Builder custom() {
return new CustomAnalyzer.Builder();
}

/**
* Creates a builder for the {@link DutchAnalyzer dutch} {@code Analyzer}
* variant.
*/
public static DutchAnalyzer.Builder dutch() {
return new DutchAnalyzer.Builder();
}

/**
* Creates a builder for the {@link FingerprintAnalyzer fingerprint}
* {@code Analyzer} variant.
Expand Down
Loading
Loading